From dfc74e98aa34d7c3a92df609849b3c0aa8ecf224 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 1 Jun 2025 00:35:48 +0200 Subject: [PATCH 01/15] GCC and Clang compatible on Windows --- cmake/config.cmake | 10 ++++++++-- cmake/server.cmake | 3 +++ public/client/TracyProfiler.cpp | 24 +++++++++++++++--------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index d563c225a1..58e815ec98 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -12,7 +12,11 @@ if (NOT NO_ISA_EXTENSIONS) endif() endif() if(WIN32) - add_compile_options(/arch:AVX2) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") + add_compile_options(-mavx2) + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:AVX2) + endif() endif() endif() @@ -30,7 +34,9 @@ endif() if(WIN32) add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) - add_compile_options(/MP) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/MP) + endif() endif() if(EMSCRIPTEN) diff --git a/cmake/server.cmake b/cmake/server.cmake index a76d1c1340..cc3a9a924a 100644 --- a/cmake/server.cmake +++ b/cmake/server.cmake @@ -30,6 +30,9 @@ list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/") add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES}) target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR}) target_link_libraries(TracyServer PUBLIC TracyCapstone libzstd PPQSort::PPQSort) +if(WIN32) + target_link_libraries(TracyServer PRIVATE Ws2_32) +endif() if(NO_STATISTICS) target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS) endif() diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 6fe7868093..4f6e31f734 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -1511,7 +1511,7 @@ void Profiler::InstallCrashHandler() #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only // want to catch unhandled exceptions. - m_prevHandler = SetUnhandledExceptionFilter( CrashFilter ); + m_prevHandler = (void *)SetUnhandledExceptionFilter( CrashFilter ); #endif #ifndef TRACY_NO_CRASH_HANDLER @@ -3122,14 +3122,20 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); #ifdef _WIN32 - __try - { - memcpy( buf, data, size ); - } - __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) - { - success = false; - } + + #if defined(__clang__) || defined(__GNUC__) + // memory size pointed to by buf variable is checked above + memcpy( buf, data, size ); + #elif defined(_MSC_VER) + __try + { + memcpy( buf, data, size ); + } + __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + { + success = false; + } + #endif #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) From a6b942d54459a7422e74ef980fc080ffcc638a8f Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 18:20:13 +0200 Subject: [PATCH 02/15] Compilation of profiler and project that uses Tracy now works with gcc and clang compilers on Windows. --- cmake/config.cmake | 5 +- import/src/import-chrome.cpp | 4 +- import/src/import-fuchsia.cpp | 5 +- import/src/json.hpp | 18 +- profiler/src/main.cpp | 8 + profiler/src/profiler/TracyView_Timeline.cpp | 8 + profiler/src/stb_image.h | 363 ++++++++++--------- public/TracyClient.cpp | 1 + public/client/TracyProfiler.cpp | 150 ++++++-- public/client/TracyProfiler.hpp | 4 + public/client/tracy_concurrentqueue.h | 6 +- public/common/TracyForceInline.hpp | 4 +- public/common/TracySocket.cpp | 1 + public/common/tracy_lz4.cpp | 10 +- public/common/tracy_lz4.hpp | 2 +- public/common/tracy_lz4hc.cpp | 3 + server/TracyFileRead.hpp | 3 +- server/TracyPrint.cpp | 2 +- server/TracyPrint.hpp | 2 +- server/tracy_robin_hood.h | 1 + server/tracy_xxhash.h | 52 ++- test/stb_image.h | 17 +- 22 files changed, 404 insertions(+), 265 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 58e815ec98..ad238cae4c 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -44,7 +44,10 @@ if(EMSCRIPTEN) endif() if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) + # Mingw gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp + if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang"))) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) + endif() endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/import/src/import-chrome.cpp b/import/src/import-chrome.cpp index 936d37d936..5a3040c338 100644 --- a/import/src/import-chrome.cpp +++ b/import/src/import-chrome.cpp @@ -11,9 +11,11 @@ #include #include -#ifdef _MSC_VER +#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +// all checked compilers contain _stat64 # define stat64 _stat64 #endif + #if defined __APPLE__ # define stat64 stat #endif diff --git a/import/src/import-fuchsia.cpp b/import/src/import-fuchsia.cpp index 785979c198..854273b2cd 100644 --- a/import/src/import-fuchsia.cpp +++ b/import/src/import-fuchsia.cpp @@ -20,8 +20,9 @@ #include #include -#ifdef _MSC_VER -#define stat64 _stat64 +#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +// all checked compilers contain _stat64 +# define stat64 _stat64 #endif #if defined __APPLE__ #define stat64 stat diff --git a/import/src/json.hpp b/import/src/json.hpp index 8b72ea6539..cca1a99925 100644 --- a/import/src/json.hpp +++ b/import/src/json.hpp @@ -16799,7 +16799,7 @@ class binary_writer void write_compact_float(const number_float_t n, detail::input_format_t format) { -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -16819,7 +16819,7 @@ class binary_writer : get_msgpack_float_prefix(n)); write_number(n); } -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif } @@ -17981,7 +17981,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '-'; } -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -17993,7 +17993,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '0'; return first; } -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif @@ -21187,7 +21187,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::negation>, detail::negation>, detail::negation>>, -#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) +#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) detail::negation>, #endif #if defined(JSON_HAS_CPP_17) && JSON_HAS_STATIC_RTTI @@ -22983,13 +22983,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ bool operator==(const_reference rhs) const noexcept { -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif const_reference lhs = *this; JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif } @@ -23087,12 +23087,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ friend bool operator==(const_reference lhs, const_reference rhs) noexcept { -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif } diff --git a/profiler/src/main.cpp b/profiler/src/main.cpp index b10a668003..0b6d34cae8 100644 --- a/profiler/src/main.cpp +++ b/profiler/src/main.cpp @@ -19,6 +19,14 @@ # include #endif +#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) +//_WIN32 - only tested on windows + +// gcc throws error for not present std::pow function, +// clang does not have that problem +#include +#endif + #define STB_IMAGE_IMPLEMENTATION #define STBI_ONLY_PNG #include "stb_image.h" diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 5200fd2c6e..50186f6f68 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -10,6 +10,14 @@ #include "TracyTimelineItemThread.hpp" #include "TracyView.hpp" +#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) +//_WIN32 - only tested on windows + +// gcc throws error for not present std::pow function, +// clang does not have that problem +#include +#endif + namespace tracy { diff --git a/profiler/src/stb_image.h b/profiler/src/stb_image.h index a632d54351..d0c064bb36 100644 --- a/profiler/src/stb_image.h +++ b/profiler/src/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,7 +48,6 @@ LICENSE RECENT REVISION HISTORY: - 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes @@ -631,7 +630,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #endif #ifndef STBI_THREAD_LOCAL - #if defined(__GNUC__) + #if defined(__GNUC__) && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #endif #endif @@ -659,12 +658,15 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NOTUSED(v) (void)sizeof(v) #endif -#ifdef _MSC_VER -#define STBI_HAS_LROTL -#endif -#ifdef STBI_HAS_LROTL +#ifdef _MSC_VER #define stbi_lrot(x,y) _lrotl(x,y) +#elif defined __clang__ + // 32bit version of function as stb image uses this function to rotate 32bit integers + #define stbi_lrot(x,y) __builtin_rotateleft32(x,y) +#elif defined __GNUC__ + // gcc built-in is type-generic with first argument being any unsigned integer and second any signed or unsigned integer or char + #define stbi_lrot(x,y) __builtin_stdc_rotate_left(x,y) #else #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif @@ -726,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if _MSC_VER >= 1400 // not VC6 +#if defined _MSC_VER && _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { @@ -1073,8 +1075,8 @@ static int stbi__addints_valid(int a, int b) return a <= INT_MAX - b; } -// returns 1 if the product of two ints fits in a signed short, 0 on overflow. -static int stbi__mul2shorts_valid(int a, int b) +// returns 1 if the product of two signed shorts is valid, 0 on overflow. +static int stbi__mul2shorts_valid(short a, short b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid @@ -3385,13 +3387,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } -static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { - stbi_uc x = stbi__get8(j->s); - while (x == 0xff) { // might be a marker + int x = stbi__get8(j->s); + while (x == 255) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { @@ -4177,7 +4179,6 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; - int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -4244,20 +4245,9 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { - if (!a->hit_zeof_once) { - // This is the first time we hit eof, insert 16 extra padding btis - // to allow us to keep going; if we actually consume any of them - // though, that is invalid data. This is caught later. - a->hit_zeof_once = 1; - a->num_bits += 16; // add 16 implicit zero bits - } else { - // We already inserted our extra 16 padding bits and are again - // out, this stream is actually prematurely terminated. - return -1; - } - } else { - stbi__fill_bits(a); + return -1; /* report error for unexpected end of data. */ } + stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { @@ -4322,13 +4312,6 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; - if (a->hit_zeof_once && a->num_bits < 16) { - // The first time we hit zeof, we inserted 16 extra zero bits into our bit - // buffer so the decoder can just do its speculative decoding. But if we - // actually consumed any of those bits (which is the case when num_bits < 16), - // the stream actually read past the end so it is malformed. - return stbi__err("unexpected end","Corrupt PNG"); - } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data @@ -4340,7 +4323,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (len > a->zout_end - zout) { + if (zout + len > a->zout_end) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -4484,7 +4467,6 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; - a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -4640,8 +4622,9 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filter used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first }; static stbi_uc first_row_filter[5] = @@ -4650,56 +4633,29 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub + STBI__F_paeth_first }; static int stbi__paeth(int a, int b, int c) { - // This formulation looks very different from the reference in the PNG spec, but is - // actually equivalent and has favorable data dependencies and admits straightforward - // generation of branch-free code, which helps performance significantly. - int thresh = c*3 - (a + b); - int lo = a < b ? a : b; - int hi = a < b ? b : a; - int t0 = (hi <= thresh) ? lo : c; - int t1 = (thresh <= lo) ? hi : t0; - return t1; + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; -// adds an extra all-255 alpha channel -// dest == src is legal -// img_n must be 1 or 3 -static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) -{ - int i; - // must process data backwards since we allow dest==src - if (img_n == 1) { - for (i=x-1; i >= 0; --i) { - dest[i*2+1] = 255; - dest[i*2+0] = src[i]; - } - } else { - STBI_ASSERT(img_n == 3); - for (i=x-1; i >= 0; --i) { - dest[i*4+3] = 255; - dest[i*4+2] = src[i*3+2]; - dest[i*4+1] = src[i*3+1]; - dest[i*4+0] = src[i*3+0]; - } - } -} - // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16 ? 2 : 1); + int bytes = (depth == 16? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; - stbi_uc *filter_buf; - int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4711,11 +4667,8 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); - // note: error exits here don't need to clean up a->out individually, - // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); - if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, @@ -4723,137 +4676,189 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); - // Allocate two scan lines worth of filter workspace buffer. - filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); - if (!filter_buf) return stbi__err("outofmem", "Out of memory"); - - // Filtering for low-bit-depth images - if (depth < 8) { - filter_bytes = 1; - width = img_width_bytes; - } - for (j=0; j < y; ++j) { - // cur/prior filter buffers alternate - stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; - stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; - stbi_uc *dest = a->out + stride*j; - int nk = width * filter_bytes; + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; int filter = *raw++; - // check filter type - if (filter > 4) { - all_ok = stbi__err("invalid filter","Corrupt PNG"); - break; + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // perform actual filtering - switch (filter) { - case STBI__F_none: - memcpy(cur, raw, nk); - break; - case STBI__F_sub: - memcpy(cur, raw, filter_bytes); - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); - break; - case STBI__F_up: - for (k = 0; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + prior[k]); - break; - case STBI__F_avg: - for (k = 0; k < filter_bytes; ++k) - cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); - break; - case STBI__F_paeth: - for (k = 0; k < filter_bytes; ++k) - cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); - break; - case STBI__F_avg_first: - memcpy(cur, raw, filter_bytes); - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); - break; + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } } - raw += nk; + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } - // expand decoded bits in cur to dest, also adding an extra alpha channel if desired - if (depth < 8) { + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range - stbi_uc *in = cur; - stbi_uc *out = dest; - stbi_uc inb = 0; - stbi__uint32 nsmp = x*img_n; - // expand bits to bytes first + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + if (depth == 4) { - for (i=0; i < nsmp; ++i) { - if ((i & 1) == 0) inb = *in++; - *out++ = scale * (inb >> 4); - inb <<= 4; + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); } + if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (i=0; i < nsmp; ++i) { - if ((i & 3) == 0) inb = *in++; - *out++ = scale * (inb >> 6); - inb <<= 2; + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); } - } else { - STBI_ASSERT(depth == 1); - for (i=0; i < nsmp; ++i) { - if ((i & 7) == 0) inb = *in++; - *out++ = scale * (inb >> 7); - inb <<= 1; + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - - // insert alpha=255 values if desired - if (img_n != out_n) - stbi__create_png_alpha_expand8(dest, dest, x, img_n); - } else if (depth == 8) { - if (img_n == out_n) - memcpy(dest, cur, x*img_n); - else - stbi__create_png_alpha_expand8(dest, cur, x, img_n); - } else if (depth == 16) { - // convert the image data from big-endian to platform-native - stbi__uint16 *dest16 = (stbi__uint16*)dest; - stbi__uint32 nsmp = x*img_n; - - if (img_n == out_n) { - for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) - *dest16 = (cur[0] << 8) | cur[1]; - } else { - STBI_ASSERT(img_n+1 == out_n); + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; if (img_n == 1) { - for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { - dest16[0] = (cur[0] << 8) | cur[1]; - dest16[1] = 0xffff; + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; } } else { STBI_ASSERT(img_n == 3); - for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { - dest16[0] = (cur[0] << 8) | cur[1]; - dest16[1] = (cur[2] << 8) | cur[3]; - dest16[2] = (cur[4] << 8) | cur[5]; - dest16[3] = 0xffff; + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; } } } } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } } - STBI_FREE(filter_buf); - if (!all_ok) return 0; - return 1; } diff --git a/public/TracyClient.cpp b/public/TracyClient.cpp index 6224f48bfe..69a5a6df70 100644 --- a/public/TracyClient.cpp +++ b/public/TracyClient.cpp @@ -51,6 +51,7 @@ #endif #ifdef _MSC_VER +// when gcc and clang linker options will be used # pragma comment(lib, "ws2_32.lib") # pragma comment(lib, "dbghelp.lib") # pragma comment(lib, "advapi32.lib") diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 4f6e31f734..359b1f478d 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -10,6 +10,9 @@ # include # include # include "../common/TracyUwp.hpp" +# if defined __clang__ || defined __GNUC__ +# include +# endif #else # include # include @@ -99,7 +102,7 @@ # define TRACY_DELAYED_INIT # endif #else -# ifdef __GNUC__ +# if defined __GNUC__ || defined __clang__ # define init_order( val ) __attribute__ ((init_priority(val))) # else # define init_order(x) @@ -112,6 +115,12 @@ extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); extern "C" typedef char* (WINAPI *t_WineGetVersion)(); extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); + +# if defined __clang__ || defined __GNUC__ + // _WIN32 +# include +#endif + #else # include # include @@ -292,6 +301,10 @@ static bool EnsureReadable( uintptr_t address ) } #endif +#if defined __linux__ + bool +#endif + #ifndef TRACY_DELAYED_INIT struct InitTimeWrapper @@ -1443,6 +1456,12 @@ Profiler::Profiler() s_token_detail = moodycamel::ProducerToken( s_queue ); s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; s_threadHandle = ThreadHandleWrapper { m_mainThread }; +# else + //#error FilipNur check if works + // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. + s_token_detail = moodycamel::ProducerToken( s_queue ); + s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; + s_threadHandle = ThreadHandleWrapper { m_mainThread }; # endif #endif @@ -1471,7 +1490,36 @@ Profiler::Profiler() m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); -#ifndef _WIN32 +#if defined _WIN32 && (defined __clang__ || defined __GNUC__) + + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + + { // scope for temporary variable originalHandlesCount + int originalHandlesCount = _getmaxstdio(); + + while(_pipe(m_pipe, m_pipeBufSize, _O_BINARY) != 0) + { + if ((errno == EMFILE) || (errno == ENFILE)) + { + // safe upper bound for exceptional situations + if(_getmaxstdio() > (originalHandlesCount + 10)) + { + throw std::runtime_error("Failed to create communication pipe!"); + } + + // as described by Raymond Chen (https://devblogs.microsoft.com/oldnewthing/20070718-00/?p=25963) + // max number of handles in windows is 10000, + // _getmaxstdio() at the start returns 512, so no fear of too much handles + _setmaxstdio(_getmaxstdio() + 1); + } + else + { + m_pipeBufSize /= 2; + } + } + } + +#elif !defined _WIN32 pipe(m_pipe); # if defined __APPLE__ || defined BSD // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default @@ -1636,6 +1684,10 @@ Profiler::~Profiler() #ifndef _WIN32 close( m_pipe[0] ); close( m_pipe[1] ); +#elif defined __clang__ || defined __GNUC__ + // _WIN32 + _close(m_pipe[0]); + _close(m_pipe[1]); #endif tracy_free( m_safeSendBuffer ); @@ -3121,21 +3173,45 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); -#ifdef _WIN32 - - #if defined(__clang__) || defined(__GNUC__) - // memory size pointed to by buf variable is checked above +#if defined _WIN32 && defined _MSC_VER + __try + { memcpy( buf, data, size ); - #elif defined(_MSC_VER) - __try + } + __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + { + success = false; + } + +#elif defined _WIN32 && (defined __clang__ || defined __GNUC__) + // Send through the pipe to ensure safe reads on compilers with no __try/__except + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + int result1, result2; + + // ENOSPC indicates that there is no more space to execute write operation + // other possible values: + // EBADF - invalid file descriptor or not opened for writing + // EINVAL - null buffer or odd number of bytes in unicode mode + while( ( result1 = _write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno != ENOSPC ) { /* retry */ } + if( result1 < 0 ) { - memcpy( buf, data, size ); + success = false; + break; } - __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + + // EBADF - errno set to this value if pipe is not opened for reading or locked + // other possible values: + // EINVAL - result1 > INT_MAX + while( ( result2 = _read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno != EBADF ) { /* retry */ } + if( result2 != result1 ) { success = false; + break; } - #endif + offset += result1; + } #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) @@ -3473,32 +3549,32 @@ void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) case SymbolQueueItemType::KernelCode: { #ifdef _WIN32 - auto mod = GetKernelModulePath( si.ptr ); - if( mod ) - { - auto fn = DecodeCallstackPtrFast( si.ptr ); - if( *fn ) - { - auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); - if( hnd ) - { - auto ptr = (const void*)GetProcAddress( hnd, fn ); - if( ptr ) - { - auto buf = (char*)tracy_malloc( si.extra ); - memcpy( buf, ptr, si.extra ); - FreeLibrary( hnd ); - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } - FreeLibrary( hnd ); - } - } - } + auto mod = GetKernelModulePath( si.ptr ); + if( mod ) + { + auto fn = DecodeCallstackPtrFast( si.ptr ); + if( *fn ) + { + auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); + if( hnd ) + { + auto ptr = (const void*)GetProcAddress( hnd, fn ); + if( ptr ) + { + auto buf = (char*)tracy_malloc( si.extra ); + memcpy( buf, ptr, si.extra ); + FreeLibrary( hnd ); + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } + FreeLibrary( hnd ); + } + } + } #elif defined __linux__ void* data = m_kcore->Retrieve( si.ptr, si.extra ); if( data ) diff --git a/public/client/TracyProfiler.hpp b/public/client/TracyProfiler.hpp index 8d16905860..368889763c 100644 --- a/public/client/TracyProfiler.hpp +++ b/public/client/TracyProfiler.hpp @@ -1067,6 +1067,10 @@ class Profiler #if defined _WIN32 void* m_prevHandler; + #if defined __clang__ || defined __GNUC__ + int m_pipe[2]; + int m_pipeBufSize; + #endif #else int m_pipe[2]; int m_pipeBufSize; diff --git a/public/client/tracy_concurrentqueue.h b/public/client/tracy_concurrentqueue.h index 4178d39ead..693d3b2c6b 100644 --- a/public/client/tracy_concurrentqueue.h +++ b/public/client/tracy_concurrentqueue.h @@ -34,7 +34,7 @@ #include "../common/TracyForceInline.hpp" #include "../common/TracySystem.hpp" -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings // upon assigning any computed values) @@ -64,7 +64,7 @@ namespace tracy // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) inline bool cqLikely(bool x) { return __builtin_expect((x), true); } inline bool cqUnlikely(bool x) { return __builtin_expect((x), false); } #else @@ -1436,6 +1436,6 @@ inline void swap(ConsumerToken& a, ConsumerToken& b) noexcept } /* namespace tracy */ -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif diff --git a/public/common/TracyForceInline.hpp b/public/common/TracyForceInline.hpp index b6a5833e58..3803e96e0b 100644 --- a/public/common/TracyForceInline.hpp +++ b/public/common/TracyForceInline.hpp @@ -1,7 +1,7 @@ #ifndef __TRACYFORCEINLINE_HPP__ #define __TRACYFORCEINLINE_HPP__ -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) # define tracy_force_inline __attribute__((always_inline)) inline #elif defined(_MSC_VER) # define tracy_force_inline __forceinline @@ -9,7 +9,7 @@ # define tracy_force_inline inline #endif -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) # define tracy_no_inline __attribute__((noinline)) #elif defined(_MSC_VER) # define tracy_no_inline __declspec(noinline) diff --git a/public/common/TracySocket.cpp b/public/common/TracySocket.cpp index bdba361965..6938fcff4a 100644 --- a/public/common/TracySocket.cpp +++ b/public/common/TracySocket.cpp @@ -22,6 +22,7 @@ # endif # define poll WSAPoll # ifdef _MSC_VER + // for gcc and clang added with linker options # pragma comment(lib, "ws2_32.lib") # endif #else diff --git a/public/common/tracy_lz4.cpp b/public/common/tracy_lz4.cpp index 15d0990f82..7e78da9ddf 100644 --- a/public/common/tracy_lz4.cpp +++ b/public/common/tracy_lz4.cpp @@ -74,11 +74,11 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ -# if defined(__GNUC__) && \ +# if (defined(__GNUC__) || defined(__clang__)) && \ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define LZ4_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || (defined(__GNUC__) || defined(__clang__)) # define LZ4_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -345,7 +345,9 @@ namespace tracy * environments. This is needed when decompressing the Linux Kernel, for example. */ #if !defined(LZ4_memcpy) -# if defined(__GNUC__) && (__GNUC__ >= 4) +# if defined(__clang__) +# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +# elif defined(__GNUC__) && (__GNUC__ >= 4) # define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) # else # define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) @@ -1283,7 +1285,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( } else { *op++ = (BYTE)(lastRun<= 4) +# if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) # define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) # else # define LZ4LIB_VISIBILITY diff --git a/public/common/tracy_lz4hc.cpp b/public/common/tracy_lz4hc.cpp index eec7239e05..bd0199353c 100644 --- a/public/common/tracy_lz4hc.cpp +++ b/public/common/tracy_lz4hc.cpp @@ -162,7 +162,10 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, #if defined(_MSC_VER) # define LZ4HC_rotl32(x,r) _rotl(x,r) +#elif defined(__clang__) +# define LZ4HC_rotl32(x,r) __builtin_rotateleft32(x,r) #else +// gcc does not provide builtin rotate left funciton for C++ (__builtin_stdc_rotate_left is available only in C) # define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) #endif diff --git a/server/TracyFileRead.hpp b/server/TracyFileRead.hpp index 988c7ae17e..10c62b06f1 100644 --- a/server/TracyFileRead.hpp +++ b/server/TracyFileRead.hpp @@ -15,7 +15,8 @@ #include #include -#ifdef _MSC_VER +#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +// MSCV, gcc and clang compilers contain _stat64 # define stat64 _stat64 #endif #if defined __APPLE__ || defined __FreeBSD__ diff --git a/server/TracyPrint.cpp b/server/TracyPrint.cpp index 9111ddb0bd..95029f557e 100644 --- a/server/TracyPrint.cpp +++ b/server/TracyPrint.cpp @@ -1,7 +1,7 @@ #ifdef _MSC_VER # pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data #endif -#ifdef __MINGW32__ +#if defined __MINGW32__ || defined __GNUC__ || defined __clang__ # define __STDC_FORMAT_MACROS #endif diff --git a/server/TracyPrint.hpp b/server/TracyPrint.hpp index d38245e359..ed9b62bb8a 100644 --- a/server/TracyPrint.hpp +++ b/server/TracyPrint.hpp @@ -16,7 +16,7 @@ # define NO_CHARCONV #endif -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ # define NO_CHARCONV #endif diff --git a/server/tracy_robin_hood.h b/server/tracy_robin_hood.h index 65f6fc1146..6dbcb9e82b 100644 --- a/server/tracy_robin_hood.h +++ b/server/tracy_robin_hood.h @@ -818,6 +818,7 @@ struct hash::value>::type> { } #if defined(__GNUC__) && !defined(__clang__) +// clang does not recognize -Wuseless-cast option # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wuseless-cast" #endif diff --git a/server/tracy_xxhash.h b/server/tracy_xxhash.h index a18e8c762d..0b0f8d57a9 100644 --- a/server/tracy_xxhash.h +++ b/server/tracy_xxhash.h @@ -260,7 +260,7 @@ extern "C" { # define XXH_STATIC_LINKING_ONLY /* make all functions private */ # undef XXH_PUBLIC_API -# if defined(__GNUC__) +# if defined(__GNUC__) || defined(__clang__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline @@ -373,11 +373,19 @@ extern "C" { /*! @brief Marks a global symbol. */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllexport)) +# elif +# define XXH_PUBLIC_API __declspec(dllexport) +# endif # elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllimport)) +# elif +# define XXH_PUBLIC_API __declspec(dllimport) +# endif # endif # else # define XXH_PUBLIC_API /* do nothing */ @@ -449,18 +457,26 @@ extern "C" { /* specific declaration modes for Windows */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllexport)) +# elif +# define XXH_PUBLIC_API __declspec(dllexport) +# endif # elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllimport)) +# elif +# define XXH_PUBLIC_API __declspec(dllimport) +# endif # endif # else # define XXH_PUBLIC_API /* do nothing */ # endif #endif -#if defined (__GNUC__) +#if defined (__GNUC__) || defined (__clang__) # define XXH_CONSTF __attribute__((const)) # define XXH_PUREF __attribute__((pure)) # define XXH_MALLOCF __attribute__((malloc)) @@ -1478,7 +1494,7 @@ struct XXH64_state_s { #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ /* In C++ alignas() is a keyword */ # define XXH_ALIGN(n) alignas(n) -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) # define XXH_ALIGN(n) __attribute__ ((aligned(n))) #elif defined(_MSC_VER) # define XXH_ALIGN(n) __declspec(align(n)) @@ -1489,7 +1505,7 @@ struct XXH64_state_s { /* Old GCC versions only accept the attribute after the type in structures. */ #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && defined(__GNUC__) + && (defined(__GNUC__) || defined(__clang__)) # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) #else # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type @@ -1983,7 +1999,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, /* prefer __packed__ structures (method 1) for GCC * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy * which for some reason does unaligned loads. */ -# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +# if (defined(__GNUC__) || defined(__clang__)) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -2413,6 +2429,7 @@ static int XXH_isLittleEndian(void) */ #if XXH_HAS_BUILTIN(__builtin_unreachable) +// gcc and clang should have this as builtin # define XXH_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) @@ -2443,13 +2460,14 @@ static int XXH_isLittleEndian(void) */ #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ && XXH_HAS_BUILTIN(__builtin_rotateleft64) -# define XXH_rotl32 __builtin_rotateleft32 -# define XXH_rotl64 __builtin_rotateleft64 +# define XXH_rotl32(x,r) __builtin_rotateleft32(x,r) +# define XXH_rotl64(x,r) __builtin_rotateleft64(x,r) /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ #elif defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else +// gcc does not provide builtin rotate left funciton for C++ (__builtin_stdc_rotate_left is available only in C) # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) #endif @@ -2464,7 +2482,8 @@ static int XXH_isLittleEndian(void) */ #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong -#elif XXH_GCC_VERSION >= 403 +#elif XXH_GCC_VERSION >= 403 || defined __clang__ +// XXH_GCC_VERSION >= 403 should be equivalent to if defined __GNUC__ # define XXH_swap32 __builtin_bswap32 #else static xxh_u32 XXH_swap32 (xxh_u32 x) @@ -3012,7 +3031,8 @@ static xxh_u64 XXH_read64(const void* memPtr) #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap64 _byteswap_uint64 -#elif XXH_GCC_VERSION >= 403 +#elif XXH_GCC_VERSION >= 403 || defined __clang__ +// XXH_GCC_VERSION >= 403 should be equivalent to if defined __GNUC__ # define XXH_swap64 __builtin_bswap64 #else static xxh_u64 XXH_swap64(xxh_u64 x) @@ -3953,7 +3973,7 @@ do { \ # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# elif ( defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) ) || defined (__clang__) # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # else # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ diff --git a/test/stb_image.h b/test/stb_image.h index 5e807a0a6e..4c03e4a8d1 100644 --- a/test/stb_image.h +++ b/test/stb_image.h @@ -621,7 +621,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local - #elif defined(__GNUC__) && __GNUC__ < 5 + #elif defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 5 #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) @@ -630,7 +630,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #endif #ifndef STBI_THREAD_LOCAL - #if defined(__GNUC__) + #if defined(__GNUC__) && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #endif #endif @@ -658,12 +658,15 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NOTUSED(v) (void)sizeof(v) #endif -#ifdef _MSC_VER -#define STBI_HAS_LROTL -#endif -#ifdef STBI_HAS_LROTL +#ifdef _MSC_VER #define stbi_lrot(x,y) _lrotl(x,y) +#elif defined __clang__ + // 32bit version of function as stb image uses this function to rotate 32bit integers + #define stbi_lrot(x,y) __builtin_rotateleft32(x,y) +#elif defined __GNUC__ + // gcc built-in is type-generic with first argument being any unsigned integer and second any signed or unsigned integer or char + #define stbi_lrot(x,y) __builtin_stdc_rotate_left(x,y) #else #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif @@ -725,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if _MSC_VER >= 1400 // not VC6 +#if defined (_MSC_VER && _MSC_VER >= 1400) // not VC6 #include // __cpuid static int stbi__cpuid3(void) { From e98001c4699fedc1fdd4d8bf257ec9ed0ee568f4 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 18:39:09 +0200 Subject: [PATCH 03/15] Minor bug fix --- cmake/config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index ad238cae4c..691a241fb9 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -45,7 +45,7 @@ endif() if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) # Mingw gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp - if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang"))) + if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) endif() endif() From ae1f7ca43ddfb5242fafe0b25b4e0b72741b09d1 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 22:43:50 +0200 Subject: [PATCH 04/15] Further tweaks and fixes --- cmake/config.cmake | 6 +- import/src/import-chrome.cpp | 2 +- import/src/import-fuchsia.cpp | 2 +- import/src/json.hpp | 18 +- profiler/src/main.cpp | 8 - profiler/src/profiler/TracyView_Timeline.cpp | 8 - profiler/src/stb_image.h | 353 +- public/TracyClient.cpp | 2 +- public/client/TracyProfiler.cpp | 10150 ++++++++--------- public/client/TracyProfiler.hpp | 2 +- public/client/tracy_concurrentqueue.h | 6 +- public/common/TracyForceInline.hpp | 4 +- public/common/tracy_lz4.cpp | 4 +- server/TracyFileRead.hpp | 2 +- server/TracyPopcnt.hpp | 2 +- server/TracyPrint.cpp | 2 +- server/TracyPrint.hpp | 2 +- server/tracy_xxhash.h | 22 +- test/stb_image.h | 2 +- 19 files changed, 5290 insertions(+), 5307 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 691a241fb9..1bdb25905e 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -43,9 +43,11 @@ if(EMSCRIPTEN) add_compile_options(-pthread -DIMGUI_IMPL_OPENGL_ES2) endif() +message("compiler = ${CMAKE_C_COMPILER_ID}") + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) - # Mingw gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp - if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) + # gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp + if(WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) endif() endif() diff --git a/import/src/import-chrome.cpp b/import/src/import-chrome.cpp index 5a3040c338..6f20d8de84 100644 --- a/import/src/import-chrome.cpp +++ b/import/src/import-chrome.cpp @@ -11,7 +11,7 @@ #include #include -#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +#if defined _MSC_VER || (defined _WIN32 && defined __GNUC__) // all checked compilers contain _stat64 # define stat64 _stat64 #endif diff --git a/import/src/import-fuchsia.cpp b/import/src/import-fuchsia.cpp index 854273b2cd..7c87211596 100644 --- a/import/src/import-fuchsia.cpp +++ b/import/src/import-fuchsia.cpp @@ -20,7 +20,7 @@ #include #include -#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +#if defined _MSC_VER || (defined _WIN32 && defined __GNUC__) // all checked compilers contain _stat64 # define stat64 _stat64 #endif diff --git a/import/src/json.hpp b/import/src/json.hpp index cca1a99925..f7aeee4c6d 100644 --- a/import/src/json.hpp +++ b/import/src/json.hpp @@ -16799,7 +16799,7 @@ class binary_writer void write_compact_float(const number_float_t n, detail::input_format_t format) { -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -16819,7 +16819,7 @@ class binary_writer : get_msgpack_float_prefix(n)); write_number(n); } -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif } @@ -17981,7 +17981,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '-'; } -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -17993,7 +17993,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '0'; return first; } -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif @@ -21187,7 +21187,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::negation>, detail::negation>, detail::negation>>, -#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) +#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) detail::negation>, #endif #if defined(JSON_HAS_CPP_17) && JSON_HAS_STATIC_RTTI @@ -22983,13 +22983,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ bool operator==(const_reference rhs) const noexcept { -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif const_reference lhs = *this; JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif } @@ -23087,12 +23087,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ friend bool operator==(const_reference lhs, const_reference rhs) noexcept { -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif } diff --git a/profiler/src/main.cpp b/profiler/src/main.cpp index 0b6d34cae8..b10a668003 100644 --- a/profiler/src/main.cpp +++ b/profiler/src/main.cpp @@ -19,14 +19,6 @@ # include #endif -#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) -//_WIN32 - only tested on windows - -// gcc throws error for not present std::pow function, -// clang does not have that problem -#include -#endif - #define STB_IMAGE_IMPLEMENTATION #define STBI_ONLY_PNG #include "stb_image.h" diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 50186f6f68..5200fd2c6e 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -10,14 +10,6 @@ #include "TracyTimelineItemThread.hpp" #include "TracyView.hpp" -#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) -//_WIN32 - only tested on windows - -// gcc throws error for not present std::pow function, -// clang does not have that problem -#include -#endif - namespace tracy { diff --git a/profiler/src/stb_image.h b/profiler/src/stb_image.h index d0c064bb36..c7041e6501 100644 --- a/profiler/src/stb_image.h +++ b/profiler/src/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,6 +48,7 @@ LICENSE RECENT REVISION HISTORY: + 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes @@ -621,7 +622,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local - #elif defined(__GNUC__) && __GNUC__ < 5 + #elif defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) @@ -658,7 +659,6 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NOTUSED(v) (void)sizeof(v) #endif - #ifdef _MSC_VER #define stbi_lrot(x,y) _lrotl(x,y) #elif defined __clang__ @@ -728,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if defined _MSC_VER && _MSC_VER >= 1400 // not VC6 +#if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { @@ -1075,8 +1075,8 @@ static int stbi__addints_valid(int a, int b) return a <= INT_MAX - b; } -// returns 1 if the product of two signed shorts is valid, 0 on overflow. -static int stbi__mul2shorts_valid(short a, short b) +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid @@ -3387,13 +3387,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } -static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - while (x == 255) { // might be a marker + stbi_uc x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { @@ -4179,6 +4179,7 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; + int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -4245,9 +4246,20 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { - return -1; /* report error for unexpected end of data. */ + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding btis + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); } - stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { @@ -4312,6 +4324,13 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our bit + // buffer so the decoder can just do its speculative decoding. But if we + // actually consumed any of those bits (which is the case when num_bits < 16), + // the stream actually read past the end so it is malformed. + return stbi__err("unexpected end","Corrupt PNG"); + } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data @@ -4323,7 +4342,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { + if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -4467,6 +4486,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; + a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -4622,9 +4642,8 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filters used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first, - STBI__F_paeth_first + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first }; static stbi_uc first_row_filter[5] = @@ -4633,29 +4652,56 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_paeth_first + STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub }; static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i=x-1; i >= 0; --i) { + dest[i*2+1] = 255; + dest[i*2+0] = src[i]; + } + } else { + STBI_ASSERT(img_n == 3); + for (i=x-1; i >= 0; --i) { + dest[i*4+3] = 255; + dest[i*4+2] = src[i*3+2]; + dest[i*4+1] = src[i*3+1]; + dest[i*4+0] = src[i*3+0]; + } + } +} + // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16? 2 : 1); + int bytes = (depth == 16 ? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; + stbi_uc *filter_buf; + int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4667,8 +4713,11 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, @@ -4676,189 +4725,137 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + // Allocate two scan lines worth of filter workspace buffer. + filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; + } + for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior; + // cur/prior filter buffers alternate + stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; + stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; + stbi_uc *dest = a->out + stride*j; + int nk = width * filter_bytes; int filter = *raw++; - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter","Corrupt PNG"); + break; } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); + break; + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); + break; + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); + break; } - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define STBI__CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; - } - #undef STBI__CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define STBI__CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; - } - #undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } - } + raw += nk; - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + // expand decoded bits in cur to dest, also adding an extra alpha channel if desired + if (depth < 8) { stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + stbi_uc *in = cur; + stbi_uc *out = dest; + stbi_uc inb = 0; + stbi__uint32 nsmp = x*img_n; - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - + // expand bits to bytes first if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); + for (i=0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; } - if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); + for (i=0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); + } else { + STBI_ASSERT(depth == 1); + for (i=0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; + + // insert alpha=255 values if desired + if (img_n != out_n) + stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x*img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + stbi__uint16 *dest16 = (stbi__uint16*)dest; + stbi__uint32 nsmp = x*img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + STBI_ASSERT(img_n+1 == out_n); if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; } } else { STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; } } } } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; - - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } } + STBI_FREE(filter_buf); + if (!all_ok) return 0; + return 1; } diff --git a/public/TracyClient.cpp b/public/TracyClient.cpp index 69a5a6df70..e50f1abaa3 100644 --- a/public/TracyClient.cpp +++ b/public/TracyClient.cpp @@ -51,7 +51,7 @@ #endif #ifdef _MSC_VER -// when gcc and clang linker options will be used +// for gcc and clang added with linker options # pragma comment(lib, "ws2_32.lib") # pragma comment(lib, "dbghelp.lib") # pragma comment(lib, "advapi32.lib") diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 359b1f478d..7bb541b3cc 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -1,5075 +1,5075 @@ -#ifdef TRACY_ENABLE - -#ifdef _WIN32 -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include -# include -# include -# include -# include -# include "../common/TracyUwp.hpp" -# if defined __clang__ || defined __GNUC__ -# include -# endif -#else -# include -# include -#endif - -#ifdef _GNU_SOURCE -# include -#endif - -#ifdef __linux__ -# include -# include -# include -# include -#endif - -#if defined __APPLE__ || defined BSD -# include -# include -#endif - -#if defined __APPLE__ -# include "TargetConditionals.h" -# include -#endif - -#ifdef __ANDROID__ -# include -# include -# include -# include -# include -# include -#endif - -#ifdef __QNX__ -# include -# include -# include -# include -# include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../common/TracyAlign.hpp" -#include "../common/TracyAlloc.hpp" -#include "../common/TracySocket.hpp" -#include "../common/TracySystem.hpp" -#include "../common/TracyYield.hpp" -#include "../common/tracy_lz4.hpp" -#include "tracy_rpmalloc.hpp" -#include "TracyCallstack.hpp" -#include "TracyDebug.hpp" -#include "TracyDxt1.hpp" -#include "TracyScoped.hpp" -#include "TracyProfiler.hpp" -#include "TracyThread.hpp" -#include "TracyArmCpuTable.hpp" -#include "TracySysTrace.hpp" -#include "../tracy/TracyC.h" - -#if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) -# error "TRACY_MANUAL_LIFETIME requires enabled TRACY_DELAYED_INIT" -#endif - -#ifdef TRACY_PORT -# ifndef TRACY_DATA_PORT -# define TRACY_DATA_PORT TRACY_PORT -# endif -# ifndef TRACY_BROADCAST_PORT -# define TRACY_BROADCAST_PORT TRACY_PORT -# endif -#endif - -#ifdef __APPLE__ -# ifndef TRACY_DELAYED_INIT -# define TRACY_DELAYED_INIT -# endif -#else -# if defined __GNUC__ || defined __clang__ -# define init_order( val ) __attribute__ ((init_priority(val))) -# else -# define init_order(x) -# endif -#endif - -#if defined _WIN32 -# include -extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); -extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); -extern "C" typedef char* (WINAPI *t_WineGetVersion)(); -extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); - -# if defined __clang__ || defined __GNUC__ - // _WIN32 -# include -#endif - -#else -# include -# include -# include -#endif -#if defined __linux__ -# include -# include -#endif - -#if !defined _WIN32 && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) -# include "TracyCpuid.hpp" -#endif - -#if !( ( defined _WIN32 && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ ) -# include -#endif - -#ifdef __QNX__ -extern char* __progname; -#endif - -namespace tracy -{ - -#ifdef __ANDROID__ -// Implementation helpers of EnsureReadable(address). -// This is so far only needed on Android, where it is common for libraries to be mapped -// with only executable, not readable, permissions. Typical example (line from /proc/self/maps): -/* -746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so -*/ -// See https://github.com/wolfpld/tracy/issues/125 . -// To work around this, we parse /proc/self/maps and we use mprotect to set read permissions -// on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery. - -namespace { -// Holds some information about a single memory mapping. -struct MappingInfo { - // Start of address range. Inclusive. - uintptr_t start_address; - // End of address range. Exclusive, so the mapping is the half-open interval - // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps. - uintptr_t end_address; - // Read/Write/Executable permissions. - bool perm_r, perm_w, perm_x; -}; -} // anonymous namespace - - // Internal implementation helper for LookUpMapping(address). - // - // Parses /proc/self/maps returning a vector. - // /proc/self/maps is assumed to be sorted by ascending address, so the resulting - // vector is sorted by ascending address too. -static std::vector ParseMappings() -{ - std::vector result; - FILE* file = fopen( "/proc/self/maps", "r" ); - if( !file ) return result; - char line[1024]; - while( fgets( line, sizeof( line ), file ) ) - { - uintptr_t start_addr; - uintptr_t end_addr; -#if defined(__LP64__) - if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; -#else - if (sscanf( line, "%dx-%dx", &start_addr, &end_addr ) != 2 ) continue; -#endif - char* first_space = strchr( line, ' ' ); - if( !first_space ) continue; - char* perm = first_space + 1; - char* second_space = strchr( perm, ' ' ); - if( !second_space || second_space - perm != 4 ) continue; - result.emplace_back(); - auto& mapping = result.back(); - mapping.start_address = start_addr; - mapping.end_address = end_addr; - mapping.perm_r = perm[0] == 'r'; - mapping.perm_w = perm[1] == 'w'; - mapping.perm_x = perm[2] == 'x'; - } - fclose( file ); - return result; -} - -// Internal implementation helper for LookUpMapping(address). -// -// Takes as input an `address` and a known vector `mappings`, assumed to be -// sorted by increasing addresses, as /proc/self/maps seems to be. -// Returns a pointer to the MappingInfo describing the mapping that this -// address belongs to, or nullptr if the address isn't in `mappings`. -static MappingInfo* LookUpMapping(std::vector& mappings, uintptr_t address) -{ - // Comparison function for std::lower_bound. Returns true if all addresses in `m1` - // are lower than `addr`. - auto Compare = []( const MappingInfo& m1, uintptr_t addr ) { - // '<=' because the address ranges are half-open intervals, [start, end). - return m1.end_address <= addr; - }; - auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare ); - if( iter == mappings.end() || iter->start_address > address) { - return nullptr; - } - return &*iter; -} - -// Internal implementation helper for EnsureReadable(address). -// -// Takes as input an `address` and returns a pointer to a MappingInfo -// describing the mapping that this address belongs to, or nullptr if -// the address isn't in any known mapping. -// -// This function is stateful and not reentrant (assumes to be called from -// only one thread). It holds a vector of mappings parsed from /proc/self/maps. -// -// Attempts to react to mappings changes by re-parsing /proc/self/maps. -static MappingInfo* LookUpMapping(uintptr_t address) -{ - // Static state managed by this function. Not constant, we mutate that state as - // we turn some mappings readable. Initially parsed once here, updated as needed below. - static std::vector s_mappings = ParseMappings(); - MappingInfo* mapping = LookUpMapping( s_mappings, address ); - if( mapping ) return mapping; - - // This address isn't in any known mapping. Try parsing again, maybe - // mappings changed. - s_mappings = ParseMappings(); - return LookUpMapping( s_mappings, address ); -} - -// Internal implementation helper for EnsureReadable(address). -// -// Attempts to make the specified `mapping` readable if it isn't already. -// Returns true if and only if the mapping is readable. -static bool EnsureReadable( MappingInfo& mapping ) -{ - if( mapping.perm_r ) - { - // The mapping is already readable. - return true; - } - int prot = PROT_READ; - if( mapping.perm_w ) prot |= PROT_WRITE; - if( mapping.perm_x ) prot |= PROT_EXEC; - if( mprotect( reinterpret_cast( mapping.start_address ), - mapping.end_address - mapping.start_address, prot ) == -1 ) - { - // Failed to make the mapping readable. Shouldn't happen, hasn't - // been observed yet. If it happened in practice, we should consider - // adding a bool to MappingInfo to track this to avoid retrying mprotect - // everytime on such mappings. - return false; - } - // The mapping is now readable. Update `mapping` so the next call will be fast. - mapping.perm_r = true; - return true; -} - -// Attempts to set the read permission on the entire mapping containing the -// specified address. Returns true if and only if the mapping is now readable. -static bool EnsureReadable( uintptr_t address ) -{ - MappingInfo* mapping = LookUpMapping(address); - return mapping && EnsureReadable( *mapping ); -} -#elif defined WIN32 -static bool EnsureReadable( uintptr_t address ) -{ - MEMORY_BASIC_INFORMATION memInfo; - VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); - return memInfo.Protect != PAGE_NOACCESS; -} -#else -static bool EnsureReadable( uintptr_t address ) -{ - return true; -} -#endif - -#if defined __linux__ - bool -#endif - -#ifndef TRACY_DELAYED_INIT - -struct InitTimeWrapper -{ - int64_t val; -}; - -struct ProducerWrapper -{ - tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; -}; - -struct ThreadHandleWrapper -{ - uint32_t val; -}; -#endif - - -#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 -static inline void CpuId( uint32_t* regs, uint32_t leaf ) -{ - memset(regs, 0, sizeof(uint32_t) * 4); -#if defined _MSC_VER - __cpuidex( (int*)regs, leaf, 0 ); -#else - __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); -#endif -} - -static void InitFailure( const char* msg ) -{ -#if defined _WIN32 - bool hasConsole = false; - bool reopen = false; - const auto attached = AttachConsole( ATTACH_PARENT_PROCESS ); - if( attached ) - { - hasConsole = true; - reopen = true; - } - else - { - const auto err = GetLastError(); - if( err == ERROR_ACCESS_DENIED ) - { - hasConsole = true; - } - } - if( hasConsole ) - { - fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); - if( reopen ) - { - freopen( "CONOUT$", "w", stderr ); - fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); - } - } - else - { -# ifndef TRACY_UWP - MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP ); -# endif - } -#else - fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); -#endif - exit( 1 ); -} - -static bool CheckHardwareSupportsInvariantTSC() -{ - const char* noCheck = GetEnvVar( "TRACY_NO_INVARIANT_CHECK" ); - if( noCheck && noCheck[0] == '1' ) return true; - - uint32_t regs[4]; - CpuId( regs, 1 ); - if( !( regs[3] & ( 1 << 4 ) ) ) - { -#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK - InitFailure( "CPU doesn't support RDTSC instruction." ); -#else - return false; -#endif - } - CpuId( regs, 0x80000007 ); - if( regs[3] & ( 1 << 8 ) ) return true; - - return false; -} - -#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER -bool HardwareSupportsInvariantTSC() -{ - static bool cachedResult = CheckHardwareSupportsInvariantTSC(); - return cachedResult; -} -#endif - -static int64_t SetupHwTimer() -{ -#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK - if( !CheckHardwareSupportsInvariantTSC() ) - { -#if defined _WIN32 - InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." ); -#else - InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." ); -#endif - } -#endif - - return Profiler::GetTime(); -} -#else -static int64_t SetupHwTimer() -{ - return Profiler::GetTime(); -} -#endif - -static const char* GetProcessName() -{ - const char* processName = "unknown"; -#ifdef _WIN32 - static char buf[_MAX_PATH]; - GetModuleFileNameA( nullptr, buf, _MAX_PATH ); - const char* ptr = buf; - while( *ptr != '\0' ) ptr++; - while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--; - if( ptr > buf ) ptr++; - processName = ptr; -#elif defined __ANDROID__ -# if __ANDROID_API__ >= 21 - auto buf = getprogname(); - if( buf ) processName = buf; -# endif -#elif defined __linux__ && defined _GNU_SOURCE - if( program_invocation_short_name ) processName = program_invocation_short_name; -#elif defined __APPLE__ || defined BSD - auto buf = getprogname(); - if( buf ) processName = buf; -#elif defined __QNX__ - processName = __progname; -#endif - return processName; -} - -static const char* GetProcessExecutablePath() -{ -#ifdef _WIN32 - static char buf[_MAX_PATH]; - GetModuleFileNameA( nullptr, buf, _MAX_PATH ); - return buf; -#elif defined __ANDROID__ - return nullptr; -#elif defined __linux__ && defined _GNU_SOURCE - return program_invocation_name; -#elif defined __APPLE__ - static char buf[1024]; - uint32_t size = 1024; - _NSGetExecutablePath( buf, &size ); - return buf; -#elif defined __DragonFly__ - static char buf[1024]; - readlink( "/proc/curproc/file", buf, 1024 ); - return buf; -#elif defined __FreeBSD__ - static char buf[1024]; - int mib[4]; - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_PATHNAME; - mib[3] = -1; - size_t cb = 1024; - sysctl( mib, 4, buf, &cb, nullptr, 0 ); - return buf; -#elif defined __NetBSD__ - static char buf[1024]; - readlink( "/proc/curproc/exe", buf, 1024 ); - return buf; -#elif defined __QNX__ - static char buf[_PC_PATH_MAX + 1]; - _cmdname(buf); - return buf; -#else - return nullptr; -#endif -} - -#if defined __linux__ && defined __ARM_ARCH -static uint32_t GetHex( char*& ptr, int skip ) -{ - uint32_t ret; - ptr += skip; - char* end; - if( ptr[0] == '0' && ptr[1] == 'x' ) - { - ptr += 2; - ret = strtol( ptr, &end, 16 ); - } - else - { - ret = strtol( ptr, &end, 10 ); - } - ptr = end; - return ret; -} -#endif - -static const char* GetHostInfo() -{ - static char buf[1024]; - auto ptr = buf; -#if defined _WIN32 -# ifdef TRACY_UWP - auto GetVersion = &::GetVersionEx; -# else - auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" ); -# endif - if( !GetVersion ) - { -# ifdef __MINGW32__ - ptr += sprintf( ptr, "OS: Windows (MingW)\n" ); -# else - ptr += sprintf( ptr, "OS: Windows\n" ); -# endif - } - else - { - RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) }; - GetVersion( &ver ); - -# ifdef __MINGW32__ - ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); -# else - auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); - auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); - if( WineGetVersion && WineGetBuildId ) - { - ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); - } - else - { - ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); - } -# endif - } -#elif defined __linux__ - struct utsname utsName; - uname( &utsName ); -# if defined __ANDROID__ - ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release ); -# else - ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release ); -# endif -#elif defined __APPLE__ -# if TARGET_OS_IPHONE == 1 - ptr += sprintf( ptr, "OS: Darwin (iOS)\n" ); -# elif TARGET_OS_MAC == 1 - ptr += sprintf( ptr, "OS: Darwin (OSX)\n" ); -# else - ptr += sprintf( ptr, "OS: Darwin (unknown)\n" ); -# endif -#elif defined __DragonFly__ - ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" ); -#elif defined __FreeBSD__ - ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" ); -#elif defined __NetBSD__ - ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); -#elif defined __OpenBSD__ - ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); -#elif defined __QNX__ - ptr += sprintf( ptr, "OS: QNX\n" ); -#else - ptr += sprintf( ptr, "OS: unknown\n" ); -#endif - -#if defined _MSC_VER -# if defined __clang__ - ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); -# else - ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER ); -# endif -#elif defined __clang__ - ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); -#elif defined __GNUC__ - ptr += sprintf( ptr, "Compiler: gcc %i.%i.%i\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ ); -#else - ptr += sprintf( ptr, "Compiler: unknown\n" ); -#endif - -#if defined _WIN32 - InitWinSock(); - - char hostname[512]; - gethostname( hostname, 512 ); - -# ifdef TRACY_UWP - const char* user = ""; -# else - DWORD userSz = UNLEN+1; - char user[UNLEN+1]; - GetUserNameA( user, &userSz ); -# endif - - ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); -#else - char hostname[_POSIX_HOST_NAME_MAX]{}; - char user[_POSIX_LOGIN_NAME_MAX]{}; - - gethostname( hostname, _POSIX_HOST_NAME_MAX ); -# if defined __ANDROID__ - const auto login = getlogin(); - if( login ) - { - strcpy( user, login ); - } - else - { - memcpy( user, "(?)", 4 ); - } -# else - getlogin_r( user, _POSIX_LOGIN_NAME_MAX ); -# endif - - ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); -#endif - -#if defined __i386 || defined _M_IX86 - ptr += sprintf( ptr, "Arch: x86\n" ); -#elif defined __x86_64__ || defined _M_X64 - ptr += sprintf( ptr, "Arch: x64\n" ); -#elif defined __aarch64__ - ptr += sprintf( ptr, "Arch: ARM64\n" ); -#elif defined __ARM_ARCH - ptr += sprintf( ptr, "Arch: ARM\n" ); -#else - ptr += sprintf( ptr, "Arch: unknown\n" ); -#endif - -#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 - uint32_t regs[4]; - char cpuModel[4*4*3+1] = {}; - auto modelPtr = cpuModel; - for( uint32_t i=0x80000002; i<0x80000005; ++i ) - { - CpuId( regs, i ); - memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs ); - } - - ptr += sprintf( ptr, "CPU: %s\n", cpuModel ); -#elif defined __linux__ && defined __ARM_ARCH - bool cpuFound = false; - FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" ); - if( fcpuinfo ) - { - enum { BufSize = 4*1024 }; - char buf[BufSize]; - const auto sz = fread( buf, 1, BufSize, fcpuinfo ); - fclose( fcpuinfo ); - const auto end = buf + sz; - auto cptr = buf; - - uint32_t impl = 0; - uint32_t var = 0; - uint32_t part = 0; - uint32_t rev = 0; - - while( end - cptr > 20 ) - { - while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 ) - { - cptr += 4; - while( end - cptr > 20 && *cptr != '\n' ) cptr++; - cptr++; - } - if( end - cptr <= 20 ) break; - cptr += 4; - if( memcmp( cptr, "implementer\t: ", 14 ) == 0 ) - { - if( impl != 0 ) break; - impl = GetHex( cptr, 14 ); - } - else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 ); - else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 ); - else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 ); - while( *cptr != '\n' && *cptr != '\0' ) cptr++; - cptr++; - } - - if( impl != 0 || var != 0 || part != 0 || rev != 0 ) - { - cpuFound = true; - ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev ); - } - } - if( !cpuFound ) - { - ptr += sprintf( ptr, "CPU: unknown\n" ); - } -#elif defined __APPLE__ && TARGET_OS_IPHONE == 1 - { - size_t sz; - sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 ); - auto str = (char*)tracy_malloc( sz ); - sysctlbyname( "hw.machine", str, &sz, nullptr, 0 ); - ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) ); - tracy_free( str ); - } -#else - ptr += sprintf( ptr, "CPU: unknown\n" ); -#endif -#ifdef __ANDROID__ - char deviceModel[PROP_VALUE_MAX+1]; - char deviceManufacturer[PROP_VALUE_MAX+1]; - __system_property_get( "ro.product.model", deviceModel ); - __system_property_get( "ro.product.manufacturer", deviceManufacturer ); - ptr += sprintf( ptr, "Device: %s %s\n", deviceManufacturer, deviceModel ); -#endif - - ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() ); - -#if defined _WIN32 - MEMORYSTATUSEX statex; - statex.dwLength = sizeof( statex ); - GlobalMemoryStatusEx( &statex ); -# ifdef _MSC_VER - ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 ); -# else - ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 ); -# endif -#elif defined __linux__ - struct sysinfo sysInfo; - sysinfo( &sysInfo ); - ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 ); -#elif defined __APPLE__ - size_t memSize; - size_t sz = sizeof( memSize ); - sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 ); - ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); -#elif defined BSD - size_t memSize; - size_t sz = sizeof( memSize ); - sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); - ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); -#elif defined __QNX__ - struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); - size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); - char *strings = SYSPAGE_ENTRY(strings)->data; - - uint64_t memSize = 0; - size_t i; - for (i = 0; i < count; i++) { - struct asinfo_entry *entry = &entries[i]; - if (strcmp(strings + entry->name, "ram") == 0) { - memSize += entry->end - entry->start + 1; - } - } - memSize = memSize / 1024 / 1024; - ptr += sprintf( ptr, "RAM: %llu MB\n", memSize); -#else - ptr += sprintf( ptr, "RAM: unknown\n" ); -#endif - - return buf; -} - -static uint64_t GetPid() -{ -#if defined _WIN32 - return uint64_t( GetCurrentProcessId() ); -#else - return uint64_t( getpid() ); -#endif -} - -void Profiler::AckServerQuery() -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::AckServerQueryNoop ); - NeedDataSize( QueueDataSize[(int)QueueType::AckServerQueryNoop] ); - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckServerQueryNoop] ); -} - -void Profiler::AckSymbolCodeNotAvailable() -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::AckSymbolCodeNotAvailable ); - NeedDataSize( QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); -} - -static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port ) -{ - static BroadcastMessage msg; - - msg.broadcastVersion = BroadcastVersion; - msg.protocolVersion = ProtocolVersion; - msg.listenPort = port; - msg.pid = GetPid(); - - memcpy( msg.programName, procname, pnsz ); - memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); - - len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 ); - return msg; -} - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER -static DWORD s_profilerThreadId = 0; -static DWORD s_symbolThreadId = 0; -static char s_crashText[1024]; - -LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) -{ - if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH; - - const unsigned ec = pExp->ExceptionRecord->ExceptionCode; - auto msgPtr = s_crashText; - switch( ec ) - { - case EXCEPTION_ACCESS_VIOLATION: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec ); - switch( pExp->ExceptionRecord->ExceptionInformation[0] ) - { - case 0: - msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); - break; - case 1: - msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); - break; - case 8: - msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); - break; - default: - break; - } - break; - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec ); - break; - case EXCEPTION_DATATYPE_MISALIGNMENT: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec ); - break; - case EXCEPTION_FLT_DIVIDE_BY_ZERO: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec ); - break; - case EXCEPTION_ILLEGAL_INSTRUCTION: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec ); - break; - case EXCEPTION_IN_PAGE_ERROR: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec ); - break; - case EXCEPTION_INT_DIVIDE_BY_ZERO: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec ); - break; - case EXCEPTION_PRIV_INSTRUCTION: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec ); - break; - case EXCEPTION_STACK_OVERFLOW: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec ); - break; - default: - return EXCEPTION_CONTINUE_SEARCH; - } - - { - GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); - - TracyQueuePrepare( QueueType::CrashReport ); - item->crashReport.time = Profiler::GetTime(); - item->crashReport.text = (uint64_t)s_crashText; - TracyQueueCommit( crashReportThread ); - } - - HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); - if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH; - - THREADENTRY32 te = { sizeof( te ) }; - if( !Thread32First( h, &te ) ) - { - CloseHandle( h ); - return EXCEPTION_CONTINUE_SEARCH; - } - - const auto pid = GetCurrentProcessId(); - const auto tid = GetCurrentThreadId(); - - do - { - if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId && te.th32ThreadID != s_symbolThreadId ) - { - HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID ); - if( th != INVALID_HANDLE_VALUE ) - { - SuspendThread( th ); - CloseHandle( th ); - } - } - } - while( Thread32Next( h, &te ) ); - CloseHandle( h ); - - { - TracyLfqPrepare( QueueType::Crash ); - TracyLfqCommit; - } - - std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); - GetProfiler().RequestShutdown(); - while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; - - return EXCEPTION_CONTINUE_SEARCH; -} -#endif - -static Profiler* s_instance = nullptr; -static Thread* s_thread; -#ifndef TRACY_NO_FRAME_IMAGE -static Thread* s_compressThread; -#endif -#ifdef TRACY_HAS_CALLSTACK -static Thread* s_symbolThread; -std::atomic s_symbolThreadGone { false }; -#endif -#ifdef TRACY_HAS_SYSTEM_TRACING -static Thread* s_sysTraceThread = nullptr; -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER -# ifndef TRACY_CRASH_SIGNAL -# define TRACY_CRASH_SIGNAL SIGPWR -# endif - -static long s_profilerTid = 0; -static long s_symbolTid = 0; -static char s_crashText[1024]; -static std::atomic s_alreadyCrashed( false ); - -static void ThreadFreezer( int /*signal*/ ) -{ - for(;;) sleep( 1000 ); -} - -static inline void HexPrint( char*& ptr, uint64_t val ) -{ - if( val == 0 ) - { - *ptr++ = '0'; - return; - } - - static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - char buf[16]; - auto bptr = buf; - - do - { - *bptr++ = HexTable[val%16]; - val /= 16; - } - while( val > 0 ); - - do - { - *ptr++ = *--bptr; - } - while( bptr != buf ); -} - -static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) -{ - bool expected = false; - if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal ); - - struct sigaction act = {}; - act.sa_handler = SIG_DFL; - sigaction( SIGABRT, &act, nullptr ); - - auto msgPtr = s_crashText; - switch( signal ) - { - case SIGILL: - strcpy( msgPtr, "Illegal Instruction.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case ILL_ILLOPC: - strcpy( msgPtr, "Illegal opcode.\n" ); - break; - case ILL_ILLOPN: - strcpy( msgPtr, "Illegal operand.\n" ); - break; - case ILL_ILLADR: - strcpy( msgPtr, "Illegal addressing mode.\n" ); - break; - case ILL_ILLTRP: - strcpy( msgPtr, "Illegal trap.\n" ); - break; - case ILL_PRVOPC: - strcpy( msgPtr, "Privileged opcode.\n" ); - break; - case ILL_PRVREG: - strcpy( msgPtr, "Privileged register.\n" ); - break; - case ILL_COPROC: - strcpy( msgPtr, "Coprocessor error.\n" ); - break; - case ILL_BADSTK: - strcpy( msgPtr, "Internal stack error.\n" ); - break; - default: - break; - } - break; - case SIGFPE: - strcpy( msgPtr, "Floating-point exception.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case FPE_INTDIV: - strcpy( msgPtr, "Integer divide by zero.\n" ); - break; - case FPE_INTOVF: - strcpy( msgPtr, "Integer overflow.\n" ); - break; - case FPE_FLTDIV: - strcpy( msgPtr, "Floating-point divide by zero.\n" ); - break; - case FPE_FLTOVF: - strcpy( msgPtr, "Floating-point overflow.\n" ); - break; - case FPE_FLTUND: - strcpy( msgPtr, "Floating-point underflow.\n" ); - break; - case FPE_FLTRES: - strcpy( msgPtr, "Floating-point inexact result.\n" ); - break; - case FPE_FLTINV: - strcpy( msgPtr, "Floating-point invalid operation.\n" ); - break; - case FPE_FLTSUB: - strcpy( msgPtr, "Subscript out of range.\n" ); - break; - default: - break; - } - break; - case SIGSEGV: - strcpy( msgPtr, "Invalid memory reference.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case SEGV_MAPERR: - strcpy( msgPtr, "Address not mapped to object.\n" ); - break; - case SEGV_ACCERR: - strcpy( msgPtr, "Invalid permissions for mapped object.\n" ); - break; -# ifdef SEGV_BNDERR - case SEGV_BNDERR: - strcpy( msgPtr, "Failed address bound checks.\n" ); - break; -# endif -# ifdef SEGV_PKUERR - case SEGV_PKUERR: - strcpy( msgPtr, "Access was denied by memory protection keys.\n" ); - break; -# endif - default: - break; - } - break; - case SIGPIPE: - strcpy( msgPtr, "Broken pipe.\n" ); - while( *msgPtr ) msgPtr++; - break; - case SIGBUS: - strcpy( msgPtr, "Bus error.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case BUS_ADRALN: - strcpy( msgPtr, "Invalid address alignment.\n" ); - break; - case BUS_ADRERR: - strcpy( msgPtr, "Nonexistent physical address.\n" ); - break; - case BUS_OBJERR: - strcpy( msgPtr, "Object-specific hardware error.\n" ); - break; -# ifdef BUS_MCEERR_AR - case BUS_MCEERR_AR: - strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" ); - break; -# endif -# ifdef BUS_MCEERR_AO - case BUS_MCEERR_AO: - strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" ); - break; -# endif - default: - break; - } - break; - case SIGABRT: - strcpy( msgPtr, "Abort signal from abort().\n" ); - break; - default: - abort(); - } - while( *msgPtr ) msgPtr++; - - if( signal != SIGPIPE ) - { - strcpy( msgPtr, "Fault address: 0x" ); - while( *msgPtr ) msgPtr++; - HexPrint( msgPtr, uint64_t( info->si_addr ) ); - *msgPtr++ = '\n'; - } - - { - GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); - - TracyQueuePrepare( QueueType::CrashReport ); - item->crashReport.time = Profiler::GetTime(); - item->crashReport.text = (uint64_t)s_crashText; - TracyQueueCommit( crashReportThread ); - } - - DIR* dp = opendir( "/proc/self/task" ); - if( !dp ) abort(); - - const auto selfTid = syscall( SYS_gettid ); - - struct dirent* ep; - while( ( ep = readdir( dp ) ) != nullptr ) - { - if( ep->d_name[0] == '.' ) continue; - int tid = atoi( ep->d_name ); - if( tid != selfTid && tid != s_profilerTid && tid != s_symbolTid ) - { - syscall( SYS_tkill, tid, TRACY_CRASH_SIGNAL ); - } - } - closedir( dp ); - -#ifdef TRACY_HAS_CALLSTACK - if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release ); -#endif - - TracyLfqPrepare( QueueType::Crash ); - TracyLfqCommit; - - std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); - GetProfiler().RequestShutdown(); - while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; - - abort(); -} -#endif - - -enum { QueuePrealloc = 256 * 1024 }; - -TRACY_API int64_t GetFrequencyQpc() -{ -#if defined _WIN32 - LARGE_INTEGER t; - QueryPerformanceFrequency( &t ); - return t.QuadPart; -#else - return 0; -#endif -} - -#ifdef TRACY_DELAYED_INIT -struct ThreadNameData; -TRACY_API moodycamel::ConcurrentQueue& GetQueue(); - -struct ProfilerData -{ - int64_t initTime = SetupHwTimer(); - moodycamel::ConcurrentQueue queue; - Profiler profiler; - std::atomic lockCounter { 0 }; - std::atomic gpuCtxCounter { 0 }; - std::atomic threadNameData { nullptr }; -}; - -struct ProducerWrapper -{ - ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {} - moodycamel::ProducerToken detail; - tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; -}; - -struct ProfilerThreadData -{ - ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {} - ProducerWrapper token; - GpuCtxWrapper gpuCtx; -# ifdef TRACY_ON_DEMAND - LuaZoneState luaZoneState; -# endif -}; - -std::atomic RpInitDone { 0 }; -std::atomic RpInitLock { 0 }; -thread_local bool RpThreadInitDone = false; -thread_local bool RpThreadShutdown = false; - -# ifdef TRACY_MANUAL_LIFETIME -ProfilerData* s_profilerData = nullptr; -static ProfilerThreadData& GetProfilerThreadData(); -static std::atomic s_isProfilerStarted { false }; -TRACY_API void StartupProfiler() -{ - s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); - new (s_profilerData) ProfilerData(); - s_profilerData->profiler.SpawnWorkerThreads(); - GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); - s_isProfilerStarted.store( true, std::memory_order_seq_cst ); -} -static ProfilerData& GetProfilerData() -{ - assert( s_profilerData ); - return *s_profilerData; -} -TRACY_API void ShutdownProfiler() -{ - s_isProfilerStarted.store( false, std::memory_order_seq_cst ); - s_profilerData->~ProfilerData(); - tracy_free( s_profilerData ); - s_profilerData = nullptr; - rpmalloc_finalize(); - RpThreadInitDone = false; - RpInitDone.store( 0, std::memory_order_release ); -} -TRACY_API bool IsProfilerStarted() -{ - return s_isProfilerStarted.load( std::memory_order_seq_cst ); -} -# else -static std::atomic profilerDataLock { 0 }; -static std::atomic profilerData { nullptr }; - -static ProfilerData& GetProfilerData() -{ - auto ptr = profilerData.load( std::memory_order_acquire ); - if( !ptr ) - { - int expected = 0; - while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } - ptr = profilerData.load( std::memory_order_acquire ); - if( !ptr ) - { - ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); - new (ptr) ProfilerData(); - profilerData.store( ptr, std::memory_order_release ); - } - profilerDataLock.store( 0, std::memory_order_release ); - } - return *ptr; -} -# endif - -// GCC prior to 8.4 had a bug with function-inline thread_local variables. Versions of glibc beginning with -// 2.18 may attempt to work around this issue, which manifests as a crash while running static destructors -// if this function is compiled into a shared object. Unfortunately, centos7 ships with glibc 2.17. If running -// on old GCC, use the old-fashioned way as a workaround -// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85400 -#if !defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 8) || ((__GNUC__ == 8) && (__GNUC_MINOR__ < 4))) -struct ProfilerThreadDataKey -{ -public: - ProfilerThreadDataKey() - { - int val = pthread_key_create(&m_key, sDestructor); - static_cast(val); // unused - assert(val == 0); - } - ~ProfilerThreadDataKey() - { - int val = pthread_key_delete(m_key); - static_cast(val); // unused - assert(val == 0); - } - ProfilerThreadData& get() - { - void* p = pthread_getspecific(m_key); - if (!p) - { - p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) ); - new (p) ProfilerThreadData(GetProfilerData()); - pthread_setspecific(m_key, p); - } - return *static_cast(p); - } -private: - pthread_key_t m_key; - - static void sDestructor(void* p) - { - ((ProfilerThreadData*)p)->~ProfilerThreadData(); - tracy_free(p); - } -}; - -static ProfilerThreadData& GetProfilerThreadData() -{ - static ProfilerThreadDataKey key; - return key.get(); -} -#else -static ProfilerThreadData& GetProfilerThreadData() -{ - thread_local ProfilerThreadData data( GetProfilerData() ); - return data; -} -#endif - -TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; } -TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; } -TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return GetProfilerData().queue; } -TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; } -TRACY_API std::atomic& GetLockCounter() { return GetProfilerData().lockCounter; } -TRACY_API std::atomic& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; } -TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; } -TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); } -std::atomic& GetThreadNameData() { return GetProfilerData().threadNameData; } - -# ifdef TRACY_ON_DEMAND -TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } -# endif - -# ifndef TRACY_MANUAL_LIFETIME -namespace -{ - const auto& __profiler_init = GetProfiler(); -} -# endif - -#else - -// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this. - -// 1a. But s_queue is needed for initialization of variables in point 2. -extern moodycamel::ConcurrentQueue s_queue; - -// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread. -thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue ); -thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) }; -thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() }; - -# ifdef _MSC_VER -// 1. Initialize these static variables before all other variables. -# pragma warning( disable : 4075 ) -# pragma init_seg( ".CRT$XCB" ) -# endif - -static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; -std::atomic init_order(102) RpInitDone( 0 ); -std::atomic init_order(102) RpInitLock( 0 ); -thread_local bool RpThreadInitDone = false; -thread_local bool RpThreadShutdown = false; -moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); -std::atomic init_order(104) s_lockCounter( 0 ); -std::atomic init_order(104) s_gpuCtxCounter( 0 ); - -thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr }; - -struct ThreadNameData; -static std::atomic init_order(104) s_threadNameDataInstance( nullptr ); -std::atomic& s_threadNameData = s_threadNameDataInstance; - -# ifdef TRACY_ON_DEMAND -thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false }; -# endif - -static Profiler init_order(105) s_profiler; - -TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return s_token.ptr; } -TRACY_API Profiler& GetProfiler() { return s_profiler; } -TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return s_queue; } -TRACY_API int64_t GetInitTime() { return s_initTime.val; } -TRACY_API std::atomic& GetLockCounter() { return s_lockCounter; } -TRACY_API std::atomic& GetGpuCtxCounter() { return s_gpuCtxCounter; } -TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; } -TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; } - -std::atomic& GetThreadNameData() { return s_threadNameData; } - -# ifdef TRACY_ON_DEMAND -TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } -# endif -#endif - -TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } -TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } - -constexpr static size_t SafeSendBufferSize = 65536; - -Profiler::Profiler() - : m_timeBegin( 0 ) - , m_mainThread( detail::GetThreadHandleImpl() ) - , m_epoch( std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count() ) - , m_shutdown( false ) - , m_shutdownManual( false ) - , m_shutdownFinished( false ) - , m_sock( nullptr ) - , m_broadcast( nullptr ) - , m_noExit( false ) - , m_userPort( 0 ) - , m_zoneId( 1 ) - , m_samplingPeriod( 0 ) - , m_stream( LZ4_createStream() ) - , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) ) - , m_bufferOffset( 0 ) - , m_bufferStart( 0 ) - , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) ) - , m_serialQueue( 1024*1024 ) - , m_serialDequeue( 1024*1024 ) -#ifndef TRACY_NO_FRAME_IMAGE - , m_fiQueue( 16 ) - , m_fiDequeue( 16 ) -#endif - , m_symbolQueue( 8*1024 ) - , m_frameCount( 0 ) - , m_isConnected( false ) -#ifdef TRACY_ON_DEMAND - , m_connectionId( 0 ) - , m_deferredQueue( 64*1024 ) -#endif - , m_paramCallback( nullptr ) - , m_sourceCallback( nullptr ) - , m_queryImage( nullptr ) - , m_queryData( nullptr ) - , m_crashHandlerInstalled( false ) - , m_programName( nullptr ) -{ - assert( !s_instance ); - s_instance = this; - -#ifndef TRACY_DELAYED_INIT -# ifdef _MSC_VER - // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. - s_token_detail = moodycamel::ProducerToken( s_queue ); - s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; - s_threadHandle = ThreadHandleWrapper { m_mainThread }; -# else - //#error FilipNur check if works - // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. - s_token_detail = moodycamel::ProducerToken( s_queue ); - s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; - s_threadHandle = ThreadHandleWrapper { m_mainThread }; -# endif -#endif - - CalibrateTimer(); - CalibrateDelay(); - ReportTopology(); - -#ifdef __linux__ - m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); - new(m_kcore) KCore(); -#endif - -#ifndef TRACY_NO_EXIT - const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); - if( noExitEnv && noExitEnv[0] == '1' ) - { - m_noExit = true; - } -#endif - - const char* userPort = GetEnvVar( "TRACY_PORT" ); - if( userPort ) - { - m_userPort = atoi( userPort ); - } - - m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); - -#if defined _WIN32 && (defined __clang__ || defined __GNUC__) - - m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; - - { // scope for temporary variable originalHandlesCount - int originalHandlesCount = _getmaxstdio(); - - while(_pipe(m_pipe, m_pipeBufSize, _O_BINARY) != 0) - { - if ((errno == EMFILE) || (errno == ENFILE)) - { - // safe upper bound for exceptional situations - if(_getmaxstdio() > (originalHandlesCount + 10)) - { - throw std::runtime_error("Failed to create communication pipe!"); - } - - // as described by Raymond Chen (https://devblogs.microsoft.com/oldnewthing/20070718-00/?p=25963) - // max number of handles in windows is 10000, - // _getmaxstdio() at the start returns 512, so no fear of too much handles - _setmaxstdio(_getmaxstdio() + 1); - } - else - { - m_pipeBufSize /= 2; - } - } - } - -#elif !defined _WIN32 - pipe(m_pipe); -# if defined __APPLE__ || defined BSD - // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default - m_pipeBufSize = 16384; -# else - m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; - while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce - m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); -# endif - fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); -#endif - -#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) - SpawnWorkerThreads(); -#endif -} - -void Profiler::InstallCrashHandler() -{ - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - struct sigaction threadFreezer = {}; - threadFreezer.sa_handler = ThreadFreezer; - sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); - - struct sigaction crashHandler = {}; - crashHandler.sa_sigaction = CrashHandler; - crashHandler.sa_flags = SA_SIGINFO; - sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); - sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); - sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); - sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); - sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); - sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); -#endif - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only - // want to catch unhandled exceptions. - m_prevHandler = (void *)SetUnhandledExceptionFilter( CrashFilter ); -#endif - -#ifndef TRACY_NO_CRASH_HANDLER - m_crashHandlerInstalled = true; -#endif - -} - -void Profiler::RemoveCrashHandler() -{ -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - if( m_crashHandlerInstalled ) - { - auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); - if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back - } -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - if( m_crashHandlerInstalled ) - { - auto restore = []( int signum, struct sigaction* prev ) { - struct sigaction old; - sigaction( signum, prev, &old ); - if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back - }; - restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); - restore( SIGILL, &m_prevSignal.ill ); - restore( SIGFPE, &m_prevSignal.fpe ); - restore( SIGSEGV, &m_prevSignal.segv ); - restore( SIGPIPE, &m_prevSignal.pipe ); - restore( SIGBUS, &m_prevSignal.bus ); - restore( SIGABRT, &m_prevSignal.abrt ); - } -#endif - m_crashHandlerInstalled = false; -} - -void Profiler::SpawnWorkerThreads() -{ -#ifdef TRACY_HAS_SYSTEM_TRACING - // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) - // as it can have significant impact on the size of the traces - const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); - const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); - if( disableSystrace ) - { - TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); - } - else if( SysTraceStart( m_samplingPeriod ) ) - { - s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); - std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); - } -#endif - - s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_thread) Thread( LaunchWorker, this ); - -#ifndef TRACY_NO_FRAME_IMAGE - s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_compressThread) Thread( LaunchCompressWorker, this ); -#endif - -#ifdef TRACY_HAS_CALLSTACK - s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_symbolThread) Thread( LaunchSymbolWorker, this ); -#endif - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - s_profilerThreadId = GetThreadId( s_thread->Handle() ); -# ifdef TRACY_HAS_CALLSTACK - s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); -# endif -#endif - -#ifdef TRACY_HAS_CALLSTACK - InitCallstackCritical(); -#endif - - m_timeBegin.store( GetTime(), std::memory_order_relaxed ); -} - -Profiler::~Profiler() -{ - m_shutdown.store( true, std::memory_order_relaxed ); - - RemoveCrashHandler(); - -#ifdef TRACY_HAS_SYSTEM_TRACING - if( s_sysTraceThread ) - { - SysTraceStop(); - s_sysTraceThread->~Thread(); - tracy_free( s_sysTraceThread ); - } -#endif - -#ifdef TRACY_HAS_CALLSTACK - s_symbolThread->~Thread(); - tracy_free( s_symbolThread ); -#endif - -#ifndef TRACY_NO_FRAME_IMAGE - s_compressThread->~Thread(); - tracy_free( s_compressThread ); -#endif - - s_thread->~Thread(); - tracy_free( s_thread ); - -#ifdef TRACY_HAS_CALLSTACK - EndCallstack(); -#endif - -#ifdef __linux__ - m_kcore->~KCore(); - tracy_free( m_kcore ); -#endif - -#ifndef _WIN32 - close( m_pipe[0] ); - close( m_pipe[1] ); -#elif defined __clang__ || defined __GNUC__ - // _WIN32 - _close(m_pipe[0]); - _close(m_pipe[1]); -#endif - tracy_free( m_safeSendBuffer ); - - tracy_free( m_lz4Buf ); - tracy_free( m_buffer ); - LZ4_freeStream( (LZ4_stream_t*)m_stream ); - - if( m_sock ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - } - - if( m_broadcast ) - { - m_broadcast->~UdpBroadcast(); - tracy_free( m_broadcast ); - } - - assert( s_instance ); - s_instance = nullptr; -} - -bool Profiler::ShouldExit() -{ - return s_instance->m_shutdown.load( std::memory_order_relaxed ); -} - -void Profiler::Worker() -{ -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - s_profilerTid = syscall( SYS_gettid ); -#endif - - ThreadExitHandler threadExitHandler; - - SetThreadName( "Tracy Profiler" ); - -#ifdef TRACY_DATA_PORT - const bool dataPortSearch = false; - auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT; -#else - const bool dataPortSearch = m_userPort == 0; - auto dataPort = m_userPort != 0 ? m_userPort : 8086; -#endif -#ifdef TRACY_BROADCAST_PORT - const auto broadcastPort = TRACY_BROADCAST_PORT; -#else - const auto broadcastPort = 8086; -#endif - - while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - -#ifdef TRACY_USE_RPMALLOC - rpmalloc_thread_initialize(); -#endif - - m_exectime = 0; - const auto execname = GetProcessExecutablePath(); - if( execname ) - { - struct stat st; - if( stat( execname, &st ) == 0 ) - { - m_exectime = (uint64_t)st.st_mtime; - } - } - - const auto procname = GetProcessName(); - const auto pnsz = std::min( strlen( procname ), WelcomeMessageProgramNameSize - 1 ); - - const auto hostinfo = GetHostInfo(); - const auto hisz = std::min( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 ); - - const uint64_t pid = GetPid(); - - uint8_t flags = 0; - -#ifdef TRACY_ON_DEMAND - flags |= WelcomeFlag::OnDemand; -#endif -#ifdef __APPLE__ - flags |= WelcomeFlag::IsApple; -#endif -#ifndef TRACY_NO_CODE_TRANSFER - flags |= WelcomeFlag::CodeTransfer; -#endif -#ifdef _WIN32 - flags |= WelcomeFlag::CombineSamples; -# ifndef TRACY_NO_CONTEXT_SWITCH - flags |= WelcomeFlag::IdentifySamples; -# endif -#endif - -#if defined __i386 || defined _M_IX86 - uint8_t cpuArch = CpuArchX86; -#elif defined __x86_64__ || defined _M_X64 - uint8_t cpuArch = CpuArchX64; -#elif defined __aarch64__ - uint8_t cpuArch = CpuArchArm64; -#elif defined __ARM_ARCH - uint8_t cpuArch = CpuArchArm32; -#else - uint8_t cpuArch = CpuArchUnknown; -#endif - -#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 - uint32_t regs[4]; - char manufacturer[12]; - CpuId( regs, 0 ); - memcpy( manufacturer, regs+1, 4 ); - memcpy( manufacturer+4, regs+3, 4 ); - memcpy( manufacturer+8, regs+2, 4 ); - - CpuId( regs, 1 ); - uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 ); -#else - const char manufacturer[12] = {}; - uint32_t cpuId = 0; -#endif - - WelcomeMessage welcome; - MemWrite( &welcome.timerMul, m_timerMul ); - MemWrite( &welcome.initBegin, GetInitTime() ); - MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); - MemWrite( &welcome.delay, m_delay ); - MemWrite( &welcome.resolution, m_resolution ); - MemWrite( &welcome.epoch, m_epoch ); - MemWrite( &welcome.exectime, m_exectime ); - MemWrite( &welcome.pid, pid ); - MemWrite( &welcome.samplingPeriod, m_samplingPeriod ); - MemWrite( &welcome.flags, flags ); - MemWrite( &welcome.cpuArch, cpuArch ); - memcpy( welcome.cpuManufacturer, manufacturer, 12 ); - MemWrite( &welcome.cpuId, cpuId ); - memcpy( welcome.programName, procname, pnsz ); - memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); - memcpy( welcome.hostInfo, hostinfo, hisz ); - memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz ); - - moodycamel::ConsumerToken token( GetQueue() ); - - ListenSocket listen; - bool isListening = false; - if( !dataPortSearch ) - { - isListening = listen.Listen( dataPort, 4 ); - } - else - { - for( uint32_t i=0; i<20; i++ ) - { - if( listen.Listen( dataPort+i, 4 ) ) - { - dataPort += i; - isListening = true; - break; - } - } - } - if( !isListening ) - { - for(;;) - { - if( ShouldExit() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - - ClearQueues( token ); - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } - -#ifndef TRACY_NO_BROADCAST - m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) ); - new(m_broadcast) UdpBroadcast(); -# ifdef TRACY_ONLY_LOCALHOST - const char* addr = "127.255.255.255"; -# elif defined TRACY_CLIENT_ADDRESS - const char* addr = TRACY_CLIENT_ADDRESS; -# elif defined __QNX__ - // global broadcast address of 255.255.255.255 is not well-supported by QNX, - // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" -# error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. -# else - const char* addr = "255.255.255.255"; -# endif - if( !m_broadcast->Open( addr, broadcastPort ) ) - { - m_broadcast->~UdpBroadcast(); - tracy_free( m_broadcast ); - m_broadcast = nullptr; - } -#endif - - int broadcastLen = 0; - auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort ); - uint64_t lastBroadcast = 0; - - // Connections loop. - // Each iteration of the loop handles whole connection. Multiple iterations will only - // happen in the on-demand mode or when handshake fails. - for(;;) - { - // Wait for incoming connection - for(;;) - { -#ifndef TRACY_NO_EXIT - if( !m_noExit && ShouldExit() ) - { - if( m_broadcast ) - { - broadcastMsg.activeTime = -1; - m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); - } - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } -#endif - m_sock = listen.Accept(); - if( m_sock ) break; -#ifndef TRACY_ON_DEMAND - ProcessSysTime(); -# ifdef TRACY_HAS_SYSPOWER - m_sysPower.Tick(); -# endif -#endif - - if( m_broadcast ) - { - const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); - if( t - lastBroadcast > 3000000000 ) // 3s - { - m_programNameLock.lock(); - if( m_programName ) - { - broadcastMsg = GetBroadcastMessage( m_programName, strlen( m_programName ), broadcastLen, dataPort ); - m_programName = nullptr; - } - m_programNameLock.unlock(); - - lastBroadcast = t; - const auto ts = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); - broadcastMsg.activeTime = int32_t( ts - m_epoch ); - assert( broadcastMsg.activeTime >= 0 ); - m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); - } - } - } - - if( m_broadcast ) - { - lastBroadcast = 0; - broadcastMsg.activeTime = -1; - m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); - } - - // Handshake - { - char shibboleth[HandshakeShibbolethSize]; - auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 ); - if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - uint32_t protocolVersion; - res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 ); - if( !res ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - if( protocolVersion != ProtocolVersion ) - { - HandshakeStatus status = HandshakeProtocolMismatch; - m_sock->Send( &status, sizeof( status ) ); - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - } - -#ifdef TRACY_ON_DEMAND - const auto currentTime = GetTime(); - ClearQueues( token ); - m_connectionId.fetch_add( 1, std::memory_order_release ); -#endif - m_isConnected.store( true, std::memory_order_release ); - InstallCrashHandler(); - - HandshakeStatus handshake = HandshakeWelcome; - m_sock->Send( &handshake, sizeof( handshake ) ); - - LZ4_resetStream( (LZ4_stream_t*)m_stream ); - m_sock->Send( &welcome, sizeof( welcome ) ); - - m_threadCtx = 0; - m_refTimeSerial = 0; - m_refTimeCtx = 0; - m_refTimeGpu = 0; - -#ifdef TRACY_ON_DEMAND - OnDemandPayloadMessage onDemand; - onDemand.frames = m_frameCount.load( std::memory_order_relaxed ); - onDemand.currentTime = currentTime; - - m_sock->Send( &onDemand, sizeof( onDemand ) ); - - m_deferredLock.lock(); - for( auto& item : m_deferredQueue ) - { - uint64_t ptr; - uint16_t size; - const auto idx = MemRead( &item.hdr.idx ); - switch( (QueueType)idx ) - { - case QueueType::MessageAppInfo: - ptr = MemRead( &item.messageFat.text ); - size = MemRead( &item.messageFat.size ); - SendSingleString( (const char*)ptr, size ); - break; - case QueueType::LockName: - ptr = MemRead( &item.lockNameFat.name ); - size = MemRead( &item.lockNameFat.size ); - SendSingleString( (const char*)ptr, size ); - break; - case QueueType::GpuContextName: - ptr = MemRead( &item.gpuContextNameFat.ptr ); - size = MemRead( &item.gpuContextNameFat.size ); - SendSingleString( (const char*)ptr, size ); - break; - default: - break; - } - AppendData( &item, QueueDataSize[idx] ); - } - m_deferredLock.unlock(); -#endif - - // Main communications loop - int keepAlive = 0; - for(;;) - { - ProcessSysTime(); -#ifdef TRACY_HAS_SYSPOWER - m_sysPower.Tick(); -#endif - const auto status = Dequeue( token ); - const auto serialStatus = DequeueSerial(); - if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) - { - break; - } - else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) - { - if( ShouldExit() ) break; - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) break; - } - if( keepAlive == 500 ) - { - QueueItem ka; - ka.hdr.type = QueueType::KeepAlive; - AppendData( &ka, QueueDataSize[ka.hdr.idx] ); - if( !CommitData() ) break; - - keepAlive = 0; - } - else if( !m_sock->HasData() ) - { - keepAlive++; - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } - else - { - keepAlive = 0; - } - - bool connActive = true; - while( m_sock->HasData() ) - { - connActive = HandleServerQuery(); - if( !connActive ) break; - } - if( !connActive ) break; - } - if( ShouldExit() ) break; - - m_isConnected.store( false, std::memory_order_release ); - RemoveCrashHandler(); - -#ifdef TRACY_ON_DEMAND - m_bufferOffset = 0; - m_bufferStart = 0; -#endif - - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - -#ifndef TRACY_ON_DEMAND - // Client is no longer available here. Accept incoming connections, but reject handshake. - for(;;) - { - if( ShouldExit() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - - ClearQueues( token ); - - m_sock = listen.Accept(); - if( m_sock ) - { - char shibboleth[HandshakeShibbolethSize]; - auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 ); - if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - uint32_t protocolVersion; - res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 ); - if( !res ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - HandshakeStatus status = HandshakeNotAvailable; - m_sock->Send( &status, sizeof( status ) ); - m_sock->~Socket(); - tracy_free( m_sock ); - } - } -#endif - } - // End of connections loop - - // Wait for symbols thread to terminate. Symbol resolution will continue in this thread. -#ifdef TRACY_HAS_CALLSTACK - while( s_symbolThreadGone.load() == false ) { YieldThread(); } -#endif - - // Client is exiting. Send items remaining in queues. - for(;;) - { - const auto status = Dequeue( token ); - const auto serialStatus = DequeueSerial(); - if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) - { - if( m_bufferOffset != m_bufferStart ) CommitData(); - break; - } - - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - } - -#ifdef TRACY_HAS_CALLSTACK - for(;;) - { - auto si = m_symbolQueue.front(); - if( !si ) break; - HandleSymbolQueueItem( *si ); - m_symbolQueue.pop(); - } -#endif - } - - // Send client termination notice to the server - QueueItem terminate; - MemWrite( &terminate.hdr.type, QueueType::Terminate ); - if( !SendData( (const char*)&terminate, 1 ) ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - // Handle remaining server queries - for(;;) - { - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - } -#ifdef TRACY_HAS_CALLSTACK - for(;;) - { - auto si = m_symbolQueue.front(); - if( !si ) break; - HandleSymbolQueueItem( *si ); - m_symbolQueue.pop(); - } -#endif - const auto status = Dequeue( token ); - const auto serialStatus = DequeueSerial(); - if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - } - } -} - -#ifndef TRACY_NO_FRAME_IMAGE -void Profiler::CompressWorker() -{ - ThreadExitHandler threadExitHandler; - SetThreadName( "Tracy DXT1" ); - while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - -#ifdef TRACY_USE_RPMALLOC - rpmalloc_thread_initialize(); -#endif - - for(;;) - { - const auto shouldExit = ShouldExit(); - - { - bool lockHeld = true; - while( !m_fiLock.try_lock() ) - { - if( m_shutdownManual.load( std::memory_order_relaxed ) ) - { - lockHeld = false; - break; - } - } - if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue ); - if( lockHeld ) - { - m_fiLock.unlock(); - } - } - - const auto sz = m_fiDequeue.size(); - if( sz > 0 ) - { - auto fi = m_fiDequeue.data(); - auto end = fi + sz; - while( fi != end ) - { - const auto w = fi->w; - const auto h = fi->h; - const auto csz = size_t( w * h / 2 ); - auto etc1buf = (char*)tracy_malloc( csz ); - CompressImageDxt1( (const char*)fi->image, etc1buf, w, h ); - tracy_free( fi->image ); - - TracyLfqPrepare( QueueType::FrameImage ); - MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf ); - MemWrite( &item->frameImageFat.frame, fi->frame ); - MemWrite( &item->frameImageFat.w, w ); - MemWrite( &item->frameImageFat.h, h ); - uint8_t flip = fi->flip; - MemWrite( &item->frameImageFat.flip, flip ); - TracyLfqCommit; - - fi++; - } - m_fiDequeue.clear(); - } - else - { - std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); - } - - if( shouldExit ) - { - return; - } - } -} -#endif - -static void FreeAssociatedMemory( const QueueItem& item ) -{ - if( item.hdr.idx >= (int)QueueType::Terminate ) return; - - uint64_t ptr; - switch( item.hdr.type ) - { - case QueueType::ZoneText: - case QueueType::ZoneName: - ptr = MemRead( &item.zoneTextFat.text ); - tracy_free( (void*)ptr ); - break; - case QueueType::MessageColor: - case QueueType::MessageColorCallstack: - ptr = MemRead( &item.messageColorFat.text ); - tracy_free( (void*)ptr ); - break; - case QueueType::Message: - case QueueType::MessageCallstack: -#ifndef TRACY_ON_DEMAND - case QueueType::MessageAppInfo: -#endif - ptr = MemRead( &item.messageFat.text ); - tracy_free( (void*)ptr ); - break; - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - ptr = MemRead( &item.zoneBegin.srcloc ); - tracy_free( (void*)ptr ); - break; - case QueueType::GpuZoneBeginAllocSrcLoc: - case QueueType::GpuZoneBeginAllocSrcLocCallstack: - case QueueType::GpuZoneBeginAllocSrcLocSerial: - case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: - ptr = MemRead( &item.gpuZoneBegin.srcloc ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackSerial: - case QueueType::Callstack: - ptr = MemRead( &item.callstackFat.ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackAlloc: - ptr = MemRead( &item.callstackAllocFat.nativePtr ); - tracy_free( (void*)ptr ); - ptr = MemRead( &item.callstackAllocFat.ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackSample: - case QueueType::CallstackSampleContextSwitch: - ptr = MemRead( &item.callstackSampleFat.ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::FrameImage: - ptr = MemRead( &item.frameImageFat.image ); - tracy_free( (void*)ptr ); - break; -#ifdef TRACY_HAS_CALLSTACK - case QueueType::CallstackFrameSize: - { - InitRpmalloc(); - auto size = MemRead( &item.callstackFrameSizeFat.size ); - auto data = (const CallstackEntry*)MemRead( &item.callstackFrameSizeFat.data ); - for( uint8_t i=0; i( &item.symbolInformationFat.needFree ); - if( needFree ) - { - ptr = MemRead( &item.symbolInformationFat.fileString ); - tracy_free( (void*)ptr ); - } - break; - } - case QueueType::SymbolCodeMetadata: - ptr = MemRead( &item.symbolCodeMetadata.ptr ); - tracy_free( (void*)ptr ); - break; -#endif -#ifndef TRACY_ON_DEMAND - case QueueType::LockName: - ptr = MemRead( &item.lockNameFat.name ); - tracy_free( (void*)ptr ); - break; - case QueueType::GpuContextName: - ptr = MemRead( &item.gpuContextNameFat.ptr ); - tracy_free( (void*)ptr ); - break; -#endif -#ifdef TRACY_ON_DEMAND - case QueueType::MessageAppInfo: - case QueueType::GpuContextName: - // Don't free memory associated with deferred messages. - break; -#endif -#ifdef TRACY_HAS_SYSTEM_TRACING - case QueueType::ExternalNameMetadata: - ptr = MemRead( &item.externalNameMetadata.name ); - tracy_free( (void*)ptr ); - ptr = MemRead( &item.externalNameMetadata.threadName ); - tracy_free_fast( (void*)ptr ); - break; -#endif - case QueueType::SourceCodeMetadata: - ptr = MemRead( &item.sourceCodeMetadata.ptr ); - tracy_free( (void*)ptr ); - break; - default: - break; - } -} - -void Profiler::ClearQueues( moodycamel::ConsumerToken& token ) -{ - for(;;) - { - const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } ); - if( sz == 0 ) break; - } - - ClearSerial(); -} - -void Profiler::ClearSerial() -{ - bool lockHeld = true; - while( !m_serialLock.try_lock() ) - { - if( m_shutdownManual.load( std::memory_order_relaxed ) ) - { - lockHeld = false; - break; - } - } - for( auto& v : m_serialQueue ) FreeAssociatedMemory( v ); - m_serialQueue.clear(); - if( lockHeld ) - { - m_serialLock.unlock(); - } - - for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v ); - m_serialDequeue.clear(); -} - -Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) -{ - bool connectionLost = false; - const auto sz = GetQueue().try_dequeue_bulk_single( token, - [this, &connectionLost] ( const uint32_t& threadId ) - { - if( ThreadCtxCheck( threadId ) == ThreadCtxStatus::ConnectionLost ) connectionLost = true; - }, - [this, &connectionLost] ( QueueItem* item, size_t sz ) - { - if( connectionLost ) return; - InitRpmalloc(); - assert( sz > 0 ); - int64_t refThread = m_refTimeThread; - int64_t refCtx = m_refTimeCtx; - int64_t refGpu = m_refTimeGpu; - while( sz-- > 0 ) - { - uint64_t ptr; - uint16_t size; - auto idx = MemRead( &item->hdr.idx ); - if( idx < (int)QueueType::Terminate ) - { - switch( (QueueType)idx ) - { - case QueueType::ZoneText: - case QueueType::ZoneName: - ptr = MemRead( &item->zoneTextFat.text ); - size = MemRead( &item->zoneTextFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::Message: - case QueueType::MessageCallstack: - ptr = MemRead( &item->messageFat.text ); - size = MemRead( &item->messageFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::MessageColor: - case QueueType::MessageColorCallstack: - ptr = MemRead( &item->messageColorFat.text ); - size = MemRead( &item->messageColorFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::MessageAppInfo: - ptr = MemRead( &item->messageFat.text ); - size = MemRead( &item->messageFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - { - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - ptr = MemRead( &item->zoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::Callstack: - ptr = MemRead( &item->callstackFat.ptr ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::CallstackAlloc: - ptr = MemRead( &item->callstackAllocFat.nativePtr ); - if( ptr != 0 ) - { - CutCallstack( (void*)ptr, "lua_pcall" ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - } - ptr = MemRead( &item->callstackAllocFat.ptr ); - SendCallstackAlloc( ptr ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::CallstackSample: - case QueueType::CallstackSampleContextSwitch: - { - ptr = MemRead( &item->callstackSampleFat.ptr ); - SendCallstackPayload64( ptr ); - tracy_free_fast( (void*)ptr ); - int64_t t = MemRead( &item->callstackSampleFat.time ); - int64_t dt = t - refCtx; - refCtx = t; - MemWrite( &item->callstackSampleFat.time, dt ); - break; - } - case QueueType::FrameImage: - { - ptr = MemRead( &item->frameImageFat.image ); - const auto w = MemRead( &item->frameImageFat.w ); - const auto h = MemRead( &item->frameImageFat.h ); - const auto csz = size_t( w * h / 2 ); - SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::ZoneBegin: - case QueueType::ZoneBeginCallstack: - { - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - break; - } - case QueueType::ZoneEnd: - { - int64_t t = MemRead( &item->zoneEnd.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneEnd.time, dt ); - break; - } - case QueueType::GpuZoneBegin: - case QueueType::GpuZoneBeginCallstack: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - break; - } - case QueueType::GpuZoneBeginAllocSrcLoc: - case QueueType::GpuZoneBeginAllocSrcLocCallstack: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - ptr = MemRead( &item->gpuZoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::GpuZoneEnd: - { - int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->gpuZoneEnd.cpuTime, dt ); - break; - } - case QueueType::GpuContextName: - ptr = MemRead( &item->gpuContextNameFat.ptr ); - size = MemRead( &item->gpuContextNameFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - case QueueType::PlotDataInt: - case QueueType::PlotDataFloat: - case QueueType::PlotDataDouble: - { - int64_t t = MemRead( &item->plotDataInt.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->plotDataInt.time, dt ); - break; - } - case QueueType::ContextSwitch: - { - int64_t t = MemRead( &item->contextSwitch.time ); - int64_t dt = t - refCtx; - refCtx = t; - MemWrite( &item->contextSwitch.time, dt ); - break; - } - case QueueType::ThreadWakeup: - { - int64_t t = MemRead( &item->threadWakeup.time ); - int64_t dt = t - refCtx; - refCtx = t; - MemWrite( &item->threadWakeup.time, dt ); - break; - } - case QueueType::GpuTime: - { - int64_t t = MemRead( &item->gpuTime.gpuTime ); - int64_t dt = t - refGpu; - refGpu = t; - MemWrite( &item->gpuTime.gpuTime, dt ); - break; - } -#ifdef TRACY_HAS_CALLSTACK - case QueueType::CallstackFrameSize: - { - auto data = (const CallstackEntry*)MemRead( &item->callstackFrameSizeFat.data ); - auto datasz = MemRead( &item->callstackFrameSizeFat.size ); - auto imageName = (const char*)MemRead( &item->callstackFrameSizeFat.imageName ); - SendSingleString( imageName ); - AppendData( item++, QueueDataSize[idx] ); - - for( uint8_t i=0; i( &item->symbolInformationFat.fileString ); - auto needFree = MemRead( &item->symbolInformationFat.needFree ); - SendSingleString( fileString ); - if( needFree ) tracy_free_fast( (void*)fileString ); - break; - } - case QueueType::SymbolCodeMetadata: - { - auto symbol = MemRead( &item->symbolCodeMetadata.symbol ); - auto ptr = (const char*)MemRead( &item->symbolCodeMetadata.ptr ); - auto size = MemRead( &item->symbolCodeMetadata.size ); - SendLongString( symbol, ptr, size, QueueType::SymbolCode ); - tracy_free_fast( (void*)ptr ); - ++item; - continue; - } -#endif -#ifdef TRACY_HAS_SYSTEM_TRACING - case QueueType::ExternalNameMetadata: - { - auto thread = MemRead( &item->externalNameMetadata.thread ); - auto name = (const char*)MemRead( &item->externalNameMetadata.name ); - auto threadName = (const char*)MemRead( &item->externalNameMetadata.threadName ); - SendString( thread, threadName, QueueType::ExternalThreadName ); - SendString( thread, name, QueueType::ExternalName ); - tracy_free_fast( (void*)threadName ); - tracy_free_fast( (void*)name ); - ++item; - continue; - } -#endif - case QueueType::SourceCodeMetadata: - { - auto ptr = (const char*)MemRead( &item->sourceCodeMetadata.ptr ); - auto size = MemRead( &item->sourceCodeMetadata.size ); - auto id = MemRead( &item->sourceCodeMetadata.id ); - SendLongString( (uint64_t)id, ptr, size, QueueType::SourceCode ); - tracy_free_fast( (void*)ptr ); - ++item; - continue; - } - default: - assert( false ); - break; - } - } - if( !AppendData( item++, QueueDataSize[idx] ) ) - { - connectionLost = true; - m_refTimeThread = refThread; - m_refTimeCtx = refCtx; - m_refTimeGpu = refGpu; - return; - } - } - m_refTimeThread = refThread; - m_refTimeCtx = refCtx; - m_refTimeGpu = refGpu; - } - ); - if( connectionLost ) return DequeueStatus::ConnectionLost; - return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; -} - -Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ) -{ - const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {}, - [this, &timeStop] ( QueueItem* item, size_t sz ) - { - assert( sz > 0 ); - int64_t refCtx = m_refTimeCtx; - while( sz-- > 0 ) - { - FreeAssociatedMemory( *item ); - if( timeStop < 0 ) return; - const auto idx = MemRead( &item->hdr.idx ); - if( idx == (uint8_t)QueueType::ContextSwitch ) - { - const auto csTime = MemRead( &item->contextSwitch.time ); - if( csTime > timeStop ) - { - timeStop = -1; - m_refTimeCtx = refCtx; - return; - } - int64_t dt = csTime - refCtx; - refCtx = csTime; - MemWrite( &item->contextSwitch.time, dt ); - if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) - { - timeStop = -2; - m_refTimeCtx = refCtx; - return; - } - } - else if( idx == (uint8_t)QueueType::ThreadWakeup ) - { - const auto csTime = MemRead( &item->threadWakeup.time ); - if( csTime > timeStop ) - { - timeStop = -1; - m_refTimeCtx = refCtx; - return; - } - int64_t dt = csTime - refCtx; - refCtx = csTime; - MemWrite( &item->threadWakeup.time, dt ); - if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) - { - timeStop = -2; - m_refTimeCtx = refCtx; - return; - } - } - item++; - } - m_refTimeCtx = refCtx; - } - ); - - if( timeStop == -2 ) return DequeueStatus::ConnectionLost; - return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; -} - -#define ThreadCtxCheckSerial( _name ) \ - uint32_t thread = MemRead( &item->_name.thread ); \ - switch( ThreadCtxCheck( thread ) ) \ - { \ - case ThreadCtxStatus::Same: break; \ - case ThreadCtxStatus::Changed: assert( m_refTimeThread == 0 ); refThread = 0; break; \ - case ThreadCtxStatus::ConnectionLost: return DequeueStatus::ConnectionLost; \ - default: assert( false ); break; \ - } - -Profiler::DequeueStatus Profiler::DequeueSerial() -{ - { - bool lockHeld = true; - while( !m_serialLock.try_lock() ) - { - if( m_shutdownManual.load( std::memory_order_relaxed ) ) - { - lockHeld = false; - break; - } - } - if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue ); - if( lockHeld ) - { - m_serialLock.unlock(); - } - } - - const auto sz = m_serialDequeue.size(); - if( sz > 0 ) - { - InitRpmalloc(); - int64_t refSerial = m_refTimeSerial; - int64_t refGpu = m_refTimeGpu; -#ifdef TRACY_FIBERS - int64_t refThread = m_refTimeThread; -#endif - auto item = m_serialDequeue.data(); - auto end = item + sz; - while( item != end ) - { - uint64_t ptr; - auto idx = MemRead( &item->hdr.idx ); - if( idx < (int)QueueType::Terminate ) - { - switch( (QueueType)idx ) - { - case QueueType::CallstackSerial: - ptr = MemRead( &item->callstackFat.ptr ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::LockWait: - case QueueType::LockSharedWait: - { - int64_t t = MemRead( &item->lockWait.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->lockWait.time, dt ); - break; - } - case QueueType::LockObtain: - case QueueType::LockSharedObtain: - { - int64_t t = MemRead( &item->lockObtain.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->lockObtain.time, dt ); - break; - } - case QueueType::LockRelease: - case QueueType::LockSharedRelease: - { - int64_t t = MemRead( &item->lockRelease.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->lockRelease.time, dt ); - break; - } - case QueueType::LockName: - { - ptr = MemRead( &item->lockNameFat.name ); - uint16_t size = MemRead( &item->lockNameFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - } - case QueueType::MemAlloc: - case QueueType::MemAllocNamed: - case QueueType::MemAllocCallstack: - case QueueType::MemAllocCallstackNamed: - { - int64_t t = MemRead( &item->memAlloc.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->memAlloc.time, dt ); - break; - } - case QueueType::MemFree: - case QueueType::MemFreeNamed: - case QueueType::MemFreeCallstack: - case QueueType::MemFreeCallstackNamed: - { - int64_t t = MemRead( &item->memFree.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->memFree.time, dt ); - break; - } - case QueueType::MemDiscard: - case QueueType::MemDiscardCallstack: - { - int64_t t = MemRead( &item->memDiscard.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->memDiscard.time, dt ); - break; - } - case QueueType::GpuZoneBeginSerial: - case QueueType::GpuZoneBeginCallstackSerial: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - break; - } - case QueueType::GpuZoneBeginAllocSrcLocSerial: - case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - ptr = MemRead( &item->gpuZoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::GpuZoneEndSerial: - { - int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->gpuZoneEnd.cpuTime, dt ); - break; - } - case QueueType::GpuTime: - { - int64_t t = MemRead( &item->gpuTime.gpuTime ); - int64_t dt = t - refGpu; - refGpu = t; - MemWrite( &item->gpuTime.gpuTime, dt ); - break; - } - case QueueType::GpuContextName: - { - ptr = MemRead( &item->gpuContextNameFat.ptr ); - uint16_t size = MemRead( &item->gpuContextNameFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - } -#ifdef TRACY_FIBERS - case QueueType::ZoneBegin: - case QueueType::ZoneBeginCallstack: - { - ThreadCtxCheckSerial( zoneBeginThread ); - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - break; - } - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - { - ThreadCtxCheckSerial( zoneBeginThread ); - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - ptr = MemRead( &item->zoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::ZoneEnd: - { - ThreadCtxCheckSerial( zoneEndThread ); - int64_t t = MemRead( &item->zoneEnd.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneEnd.time, dt ); - break; - } - case QueueType::ZoneText: - case QueueType::ZoneName: - { - ThreadCtxCheckSerial( zoneTextFatThread ); - ptr = MemRead( &item->zoneTextFat.text ); - uint16_t size = MemRead( &item->zoneTextFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::Message: - case QueueType::MessageCallstack: - { - ThreadCtxCheckSerial( messageFatThread ); - ptr = MemRead( &item->messageFat.text ); - uint16_t size = MemRead( &item->messageFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::MessageColor: - case QueueType::MessageColorCallstack: - { - ThreadCtxCheckSerial( messageColorFatThread ); - ptr = MemRead( &item->messageColorFat.text ); - uint16_t size = MemRead( &item->messageColorFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::Callstack: - { - ThreadCtxCheckSerial( callstackFatThread ); - ptr = MemRead( &item->callstackFat.ptr ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::CallstackAlloc: - { - ThreadCtxCheckSerial( callstackAllocFatThread ); - ptr = MemRead( &item->callstackAllocFat.nativePtr ); - if( ptr != 0 ) - { - CutCallstack( (void*)ptr, "lua_pcall" ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - } - ptr = MemRead( &item->callstackAllocFat.ptr ); - SendCallstackAlloc( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::FiberEnter: - { - ThreadCtxCheckSerial( fiberEnter ); - int64_t t = MemRead( &item->fiberEnter.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->fiberEnter.time, dt ); - break; - } - case QueueType::FiberLeave: - { - ThreadCtxCheckSerial( fiberLeave ); - int64_t t = MemRead( &item->fiberLeave.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->fiberLeave.time, dt ); - break; - } -#endif - default: - assert( false ); - break; - } - } -#ifdef TRACY_FIBERS - else - { - switch( (QueueType)idx ) - { - case QueueType::ZoneColor: - { - ThreadCtxCheckSerial( zoneColorThread ); - break; - } - case QueueType::ZoneValue: - { - ThreadCtxCheckSerial( zoneValueThread ); - break; - } - case QueueType::ZoneValidation: - { - ThreadCtxCheckSerial( zoneValidationThread ); - break; - } - case QueueType::MessageLiteral: - case QueueType::MessageLiteralCallstack: - { - ThreadCtxCheckSerial( messageLiteralThread ); - break; - } - case QueueType::MessageLiteralColor: - case QueueType::MessageLiteralColorCallstack: - { - ThreadCtxCheckSerial( messageColorLiteralThread ); - break; - } - case QueueType::CrashReport: - { - ThreadCtxCheckSerial( crashReportThread ); - break; - } - default: - break; - } - } -#endif - if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost; - item++; - } - m_refTimeSerial = refSerial; - m_refTimeGpu = refGpu; -#ifdef TRACY_FIBERS - m_refTimeThread = refThread; -#endif - m_serialDequeue.clear(); - } - else - { - return DequeueStatus::QueueEmpty; - } - return DequeueStatus::DataDequeued; -} - -Profiler::ThreadCtxStatus Profiler::ThreadCtxCheck( uint32_t threadId ) -{ - if( m_threadCtx == threadId ) return ThreadCtxStatus::Same; - QueueItem item; - MemWrite( &item.hdr.type, QueueType::ThreadContext ); - MemWrite( &item.threadCtx.thread, threadId ); - if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return ThreadCtxStatus::ConnectionLost; - m_threadCtx = threadId; - m_refTimeThread = 0; - return ThreadCtxStatus::Changed; -} - -bool Profiler::CommitData() -{ - bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart ); - if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0; - m_bufferStart = m_bufferOffset; - return ret; -} - -char* Profiler::SafeCopyProlog( const char* data, size_t size ) -{ - bool success = true; - char* buf = m_safeSendBuffer; -#ifndef NDEBUG - assert( !m_inUse.exchange(true) ); -#endif - - if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); - -#if defined _WIN32 && defined _MSC_VER - __try - { - memcpy( buf, data, size ); - } - __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) - { - success = false; - } - -#elif defined _WIN32 && (defined __clang__ || defined __GNUC__) - // Send through the pipe to ensure safe reads on compilers with no __try/__except - for( size_t offset = 0; offset != size; /*in loop*/ ) - { - size_t sendsize = size - offset; - int result1, result2; - - // ENOSPC indicates that there is no more space to execute write operation - // other possible values: - // EBADF - invalid file descriptor or not opened for writing - // EINVAL - null buffer or odd number of bytes in unicode mode - while( ( result1 = _write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno != ENOSPC ) { /* retry */ } - if( result1 < 0 ) - { - success = false; - break; - } - - // EBADF - errno set to this value if pipe is not opened for reading or locked - // other possible values: - // EINVAL - result1 > INT_MAX - while( ( result2 = _read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno != EBADF ) { /* retry */ } - if( result2 != result1 ) - { - success = false; - break; - } - offset += result1; - } -#else - // Send through the pipe to ensure safe reads - for( size_t offset = 0; offset != size; /*in loop*/ ) - { - size_t sendsize = size - offset; - ssize_t result1, result2; - while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } - if( result1 < 0 ) - { - success = false; - break; - } - while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } - if( result2 != result1 ) - { - success = false; - break; - } - offset += result1; - } -#endif - - if( success ) return buf; - - SafeCopyEpilog( buf ); - return nullptr; -} - -void Profiler::SafeCopyEpilog( char* buf ) -{ - if( buf != m_safeSendBuffer ) tracy_free( buf ); - -#ifndef NDEBUG - m_inUse.store( false ); -#endif -} - -bool Profiler::SendData( const char* data, size_t len ) -{ - const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); - memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) ); - return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; -} - -void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type ) -{ - assert( type == QueueType::StringData || - type == QueueType::ThreadName || - type == QueueType::PlotName || - type == QueueType::FrameName || - type == QueueType::ExternalName || - type == QueueType::ExternalThreadName || - type == QueueType::FiberName ); - - QueueItem item; - MemWrite( &item.hdr.type, type ); - MemWrite( &item.stringTransfer.ptr, str ); - - assert( len <= std::numeric_limits::max() ); - auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)type] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr, l16 ); -} - -void Profiler::SendSingleString( const char* ptr, size_t len ) -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SingleStringData ); - - assert( len <= std::numeric_limits::max() ); - auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr, l16 ); -} - -void Profiler::SendSecondString( const char* ptr, size_t len ) -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SecondStringData ); - - assert( len <= std::numeric_limits::max() ); - auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr, l16 ); -} - -void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) -{ - assert( type == QueueType::FrameImageData || - type == QueueType::SymbolCode || - type == QueueType::SourceCode ); - - QueueItem item; - MemWrite( &item.hdr.type, type ); - MemWrite( &item.stringTransfer.ptr, str ); - - assert( len <= std::numeric_limits::max() ); - assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize ); - auto l32 = uint32_t( len ); - - NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)type] ); - AppendDataUnsafe( &l32, sizeof( l32 ) ); - AppendDataUnsafe( ptr, l32 ); -} - -void Profiler::SendSourceLocation( uint64_t ptr ) -{ - auto srcloc = (const SourceLocationData*)ptr; - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SourceLocation ); - MemWrite( &item.srcloc.name, (uint64_t)srcloc->name ); - MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); - MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); - MemWrite( &item.srcloc.line, srcloc->line ); - MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) ); - MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); - MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); - AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); -} - -void Profiler::SendSourceLocationPayload( uint64_t _ptr ) -{ - auto ptr = (const char*)_ptr; - - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); - MemWrite( &item.stringTransfer.ptr, _ptr ); - - uint16_t len; - memcpy( &len, ptr, sizeof( len ) ); - assert( len > 2 ); - len -= 2; - ptr += 2; - - NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); - AppendDataUnsafe( &len, sizeof( len ) ); - AppendDataUnsafe( ptr, len ); -} - -void Profiler::SendCallstackPayload( uint64_t _ptr ) -{ - auto ptr = (uintptr_t*)_ptr; - - QueueItem item; - MemWrite( &item.hdr.type, QueueType::CallstackPayload ); - MemWrite( &item.stringTransfer.ptr, _ptr ); - - const auto sz = *ptr++; - const auto len = sz * sizeof( uint64_t ); - const auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - - if( compile_time_condition::value ) - { - AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz ); - } - else - { - for( uintptr_t i=0; i> 63 != 0 ) - { - SendSingleString( "" ); - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SymbolInformation ); - MemWrite( &item.symbolInformation.line, 0 ); - MemWrite( &item.symbolInformation.symAddr, symbol ); - AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] ); - } - else - { - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } ); - } -#else - AckServerQuery(); -#endif -} - -void Profiler::QueueExternalName( uint64_t ptr ) -{ -#ifdef TRACY_HAS_SYSTEM_TRACING - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } ); -#endif -} - -void Profiler::QueueKernelCode( uint64_t symbol, uint32_t size ) -{ - assert( symbol >> 63 != 0 ); -#ifdef TRACY_HAS_CALLSTACK - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::KernelCode, symbol, size } ); -#else - AckSymbolCodeNotAvailable(); -#endif -} - -void Profiler::QueueSourceCodeQuery( uint32_t id ) -{ - assert( m_exectime != 0 ); - assert( m_queryData ); - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SourceCode, uint64_t( m_queryData ), uint64_t( m_queryImage ), id } ); - m_queryData = nullptr; - m_queryImage = nullptr; -} - -#ifdef TRACY_HAS_CALLSTACK -void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) -{ - switch( si.type ) - { - case SymbolQueueItemType::CallstackFrame: - { - const auto frameData = DecodeCallstackPtr( si.ptr ); - auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size ); - memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size ); - TracyLfqPrepare( QueueType::CallstackFrameSize ); - MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr ); - MemWrite( &item->callstackFrameSizeFat.size, frameData.size ); - MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data ); - MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName ); - TracyLfqCommit; - break; - } - case SymbolQueueItemType::SymbolQuery: - { -#ifdef __ANDROID__ - // On Android it's common for code to be in mappings that are only executable - // but not readable. - if( !EnsureReadable( si.ptr ) ) - { - TracyLfqPrepare( QueueType::AckServerQueryNoop ); - TracyLfqCommit; - break; - } -#endif - const auto sym = DecodeSymbolAddress( si.ptr ); - TracyLfqPrepare( QueueType::SymbolInformation ); - MemWrite( &item->symbolInformationFat.line, sym.line ); - MemWrite( &item->symbolInformationFat.symAddr, si.ptr ); - MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file ); - MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree ); - TracyLfqCommit; - break; - } -#ifdef TRACY_HAS_SYSTEM_TRACING - case SymbolQueueItemType::ExternalName: - { - const char* threadName; - const char* name; - SysTraceGetExternalName( si.ptr, threadName, name ); - TracyLfqPrepare( QueueType::ExternalNameMetadata ); - MemWrite( &item->externalNameMetadata.thread, si.ptr ); - MemWrite( &item->externalNameMetadata.name, (uint64_t)name ); - MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName ); - TracyLfqCommit; - break; - } -#endif - case SymbolQueueItemType::KernelCode: - { -#ifdef _WIN32 - auto mod = GetKernelModulePath( si.ptr ); - if( mod ) - { - auto fn = DecodeCallstackPtrFast( si.ptr ); - if( *fn ) - { - auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); - if( hnd ) - { - auto ptr = (const void*)GetProcAddress( hnd, fn ); - if( ptr ) - { - auto buf = (char*)tracy_malloc( si.extra ); - memcpy( buf, ptr, si.extra ); - FreeLibrary( hnd ); - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } - FreeLibrary( hnd ); - } - } - } -#elif defined __linux__ - void* data = m_kcore->Retrieve( si.ptr, si.extra ); - if( data ) - { - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } -#endif - TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); - TracyLfqCommit; - break; - } - case SymbolQueueItemType::SourceCode: - HandleSourceCodeQuery( (char*)si.ptr, (char*)si.extra, si.id ); - break; - default: - assert( false ); - break; - } -} - -void Profiler::SymbolWorker() -{ -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - s_symbolTid = syscall( SYS_gettid ); -#endif - - ThreadExitHandler threadExitHandler; - SetThreadName( "Tracy Symbol Worker" ); -#ifdef TRACY_USE_RPMALLOC - InitRpmalloc(); -#endif - InitCallstack(); - while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - - for(;;) - { - const auto shouldExit = ShouldExit(); -#ifdef TRACY_ON_DEMAND - if( !IsConnected() ) - { - if( shouldExit ) - { - s_symbolThreadGone.store( true, std::memory_order_release ); - return; - } - while( m_symbolQueue.front() ) m_symbolQueue.pop(); - std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); - continue; - } -#endif - auto si = m_symbolQueue.front(); - if( si ) - { - HandleSymbolQueueItem( *si ); - m_symbolQueue.pop(); - } - else - { - if( shouldExit ) - { - s_symbolThreadGone.store( true, std::memory_order_release ); - return; - } - std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); - } - } -} -#endif - -bool Profiler::HandleServerQuery() -{ - ServerQueryPacket payload; - if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false; - - uint8_t type; - uint64_t ptr; - memcpy( &type, &payload.type, sizeof( payload.type ) ); - memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) ); - - switch( type ) - { - case ServerQueryString: - SendString( ptr, (const char*)ptr, QueueType::StringData ); - break; - case ServerQueryThreadString: - if( ptr == m_mainThread ) - { - SendString( ptr, "Main thread", 11, QueueType::ThreadName ); - } - else - { - auto t = GetThreadNameData( (uint32_t)ptr ); - if( t ) - { - SendString( ptr, t->name, QueueType::ThreadName ); - if( t->groupHint != 0 ) - { - TracyLfqPrepare( QueueType::ThreadGroupHint ); - MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); - MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); - TracyLfqCommit; - } - } - else - { - SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); - } - } - break; - case ServerQuerySourceLocation: - SendSourceLocation( ptr ); - break; - case ServerQueryPlotName: - SendString( ptr, (const char*)ptr, QueueType::PlotName ); - break; - case ServerQueryTerminate: - return false; - case ServerQueryCallstackFrame: - QueueCallstackFrame( ptr ); - break; - case ServerQueryFrameName: - SendString( ptr, (const char*)ptr, QueueType::FrameName ); - break; - case ServerQueryDisconnect: - HandleDisconnect(); - return false; -#ifdef TRACY_HAS_SYSTEM_TRACING - case ServerQueryExternalName: - QueueExternalName( ptr ); - break; -#endif - case ServerQueryParameter: - HandleParameter( ptr ); - break; - case ServerQuerySymbol: - QueueSymbolQuery( ptr ); - break; -#ifndef TRACY_NO_CODE_TRANSFER - case ServerQuerySymbolCode: - HandleSymbolCodeQuery( ptr, payload.extra ); - break; -#endif - case ServerQuerySourceCode: - QueueSourceCodeQuery( uint32_t( ptr ) ); - break; - case ServerQueryDataTransfer: - if( m_queryData ) - { - assert( !m_queryImage ); - m_queryImage = m_queryData; - } - m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 ); - AckServerQuery(); - break; - case ServerQueryDataTransferPart: - memcpy( m_queryDataPtr, &ptr, 8 ); - memcpy( m_queryDataPtr+8, &payload.extra, 4 ); - m_queryDataPtr += 12; - AckServerQuery(); - break; -#ifdef TRACY_FIBERS - case ServerQueryFiberName: - SendString( ptr, (const char*)ptr, QueueType::FiberName ); - break; -#endif - default: - assert( false ); - break; - } - - return true; -} - -void Profiler::HandleDisconnect() -{ - moodycamel::ConsumerToken token( GetQueue() ); - -#ifdef TRACY_HAS_SYSTEM_TRACING - if( s_sysTraceThread ) - { - auto timestamp = GetTime(); - for(;;) - { - const auto status = DequeueContextSwitches( token, timestamp ); - if( status == DequeueStatus::ConnectionLost ) - { - return; - } - else if( status == DequeueStatus::QueueEmpty ) - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - } - if( timestamp < 0 ) - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - break; - } - ClearSerial(); - if( m_sock->HasData() ) - { - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) return; - } - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - } - else - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } - } -#endif - - QueueItem terminate; - MemWrite( &terminate.hdr.type, QueueType::Terminate ); - if( !SendData( (const char*)&terminate, 1 ) ) return; - for(;;) - { - ClearQueues( token ); - if( m_sock->HasData() ) - { - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) return; - } - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - } - else - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } -} - -void Profiler::CalibrateTimer() -{ - m_timerMul = 1.; - -#ifdef TRACY_HW_TIMER - -# if !defined TRACY_TIMER_QPC && defined TRACY_TIMER_FALLBACK - const bool needCalibration = HardwareSupportsInvariantTSC(); -# else - const bool needCalibration = true; -# endif - if( needCalibration ) - { - std::atomic_signal_fence( std::memory_order_acq_rel ); - const auto t0 = std::chrono::high_resolution_clock::now(); - const auto r0 = GetTime(); - std::atomic_signal_fence( std::memory_order_acq_rel ); - std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) ); - std::atomic_signal_fence( std::memory_order_acq_rel ); - const auto t1 = std::chrono::high_resolution_clock::now(); - const auto r1 = GetTime(); - std::atomic_signal_fence( std::memory_order_acq_rel ); - - const auto dt = std::chrono::duration_cast( t1 - t0 ).count(); - const auto dr = r1 - r0; - - m_timerMul = double( dt ) / double( dr ); - } -#endif -} - -void Profiler::CalibrateDelay() -{ - constexpr int Iterations = 50000; - - auto mindiff = std::numeric_limits::max(); - for( int i=0; i 0 && dti < mindiff ) mindiff = dti; - } - m_resolution = mindiff; - -#ifdef TRACY_DELAYED_INIT - m_delay = m_resolution; -#else - constexpr int Events = Iterations * 2; // start + end - static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); - - static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; - const auto t0 = GetTime(); - for( int i=0; izoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); - TracyLfqCommit; - } - { - TracyLfqPrepare( QueueType::ZoneEnd ); - MemWrite( &item->zoneEnd.time, GetTime() ); - TracyLfqCommit; - } - } - const auto t1 = GetTime(); - const auto dt = t1 - t0; - m_delay = dt / Events; - - moodycamel::ConsumerToken token( GetQueue() ); - int left = Events; - while( left != 0 ) - { - const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} ); - assert( sz > 0 ); - left -= (int)sz; - } - assert( GetQueue().size_approx() == 0 ); -#endif -} - -void Profiler::ReportTopology() -{ -#ifndef TRACY_DELAYED_INIT - struct CpuData - { - uint32_t package; - uint32_t die; - uint32_t core; - uint32_t thread; - }; - -#if defined _WIN32 -# ifdef TRACY_UWP - t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx; -# else - t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" ); -# endif - if( !_GetLogicalProcessorInformationEx ) return; - - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* packageInfo = nullptr; - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* dieInfo = nullptr; - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* coreInfo = nullptr; - - DWORD psz = 0; - _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); - if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) - { - packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); - assert( res ); - } - else - { - psz = 0; - } - - DWORD dsz = 0; - _GetLogicalProcessorInformationEx( RelationProcessorDie, nullptr, &dsz ); - if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) - { - dieInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( dsz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorDie, dieInfo, &dsz ); - assert( res ); - } - else - { - dsz = 0; - } - - DWORD csz = 0; - _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); - if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) - { - coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); - assert( res ); - } - else - { - csz = 0; - } - - SYSTEM_INFO sysinfo; - GetSystemInfo( &sysinfo ); - const uint32_t numcpus = sysinfo.dwNumberOfProcessors; - - auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); - memset( cpuData, 0, sizeof( CpuData ) * numcpus ); - for( uint32_t i=0; iRelationship == RelationProcessorPackage ); - // FIXME account for GroupCount - auto mask = ptr->Processor.GroupMask[0].Mask; - int core = 0; - while( mask != 0 ) - { - if( mask & 1 ) cpuData[core].package = idx; - core++; - mask >>= 1; - } - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - idx++; - } - - idx = 0; - ptr = dieInfo; - while( (char*)ptr < ((char*)dieInfo) + dsz ) - { - assert( ptr->Relationship == RelationProcessorDie ); - // FIXME account for GroupCount - auto mask = ptr->Processor.GroupMask[0].Mask; - int core = 0; - while( mask != 0 ) - { - if( mask & 1 ) cpuData[core].die = idx; - core++; - mask >>= 1; - } - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - idx++; - } - - idx = 0; - ptr = coreInfo; - while( (char*)ptr < ((char*)coreInfo) + csz ) - { - assert( ptr->Relationship == RelationProcessorCore ); - // FIXME account for GroupCount - auto mask = ptr->Processor.GroupMask[0].Mask; - int core = 0; - while( mask != 0 ) - { - if( mask & 1 ) cpuData[core].core = idx; - core++; - mask >>= 1; - } - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - idx++; - } - - for( uint32_t i=0; icpuTopology.package, data.package ); - MemWrite( &item->cpuTopology.die, data.die ); - MemWrite( &item->cpuTopology.core, data.core ); - MemWrite( &item->cpuTopology.thread, data.thread ); - -#ifdef TRACY_ON_DEMAND - DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - tracy_free( cpuData ); - tracy_free( coreInfo ); - tracy_free( packageInfo ); -#elif defined __linux__ - const int numcpus = std::thread::hardware_concurrency(); - auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); - memset( cpuData, 0, sizeof( CpuData ) * numcpus ); - - const char* basePath = "/sys/devices/system/cpu/cpu"; - for( int i=0; icpuTopology.package, data.package ); - MemWrite( &item->cpuTopology.die, data.die ); - MemWrite( &item->cpuTopology.core, data.core ); - MemWrite( &item->cpuTopology.thread, data.thread ); - -#ifdef TRACY_ON_DEMAND - DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - tracy_free( cpuData ); -#endif -#endif -} - -void Profiler::SendCallstack( int32_t depth, const char* skipBefore ) -{ -#ifdef TRACY_HAS_CALLSTACK - auto ptr = Callstack( depth ); - CutCallstack( ptr, skipBefore ); - - TracyQueuePrepare( QueueType::Callstack ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - TracyQueueCommit( callstackFatThread ); -#endif -} - -void Profiler::CutCallstack( void* callstack, const char* skipBefore ) -{ -#ifdef TRACY_HAS_CALLSTACK - auto data = (uintptr_t*)callstack; - const auto sz = *data++; - uintptr_t i; - for( i=0; i 100000000 ) // 100 ms - { - auto sysTime = m_sysTime.Get(); - if( sysTime >= 0 ) - { - m_sysTimeLast = t; - - TracyLfqPrepare( QueueType::SysTimeReport ); - MemWrite( &item->sysTime.time, GetTime() ); - MemWrite( &item->sysTime.sysTime, sysTime ); - TracyLfqCommit; - } - } -} -#endif - -void Profiler::HandleParameter( uint64_t payload ) -{ - assert( m_paramCallback ); - const auto idx = uint32_t( payload >> 32 ); - const auto val = int32_t( payload & 0xFFFFFFFF ); - m_paramCallback( m_paramCallbackData, idx, val ); - AckServerQuery(); -} - -void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) -{ - if( symbol >> 63 != 0 ) - { - QueueKernelCode( symbol, size ); - } - else - { - auto&& lambda = [ this, symbol ]( const char* buf, size_t size ) { - SendLongString( symbol, buf, size, QueueType::SymbolCode ); - }; - - // 'symbol' may have come from a module that has since unloaded, perform a safe copy before sending - if( !WithSafeCopy( (const char*)symbol, size, lambda ) ) AckSymbolCodeNotAvailable(); - } -} - -void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) -{ - bool ok = false; - FILE* f = fopen( data, "rb" ); - if( f ) - { - struct stat st; - if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) - { - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = fread( ptr, 1, st.st_size, f ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } - else - { - tracy_free_fast( ptr ); - } - } - fclose( f ); - } - -#ifdef TRACY_DEBUGINFOD - else if( image && data[0] == '/' ) - { - size_t size; - auto buildid = GetBuildIdForImage( image, size ); - if( buildid ) - { - auto d = debuginfod_find_source( GetDebuginfodClient(), buildid, size, data, nullptr ); - TracyDebug( "DebugInfo source query: %s, fn: %s, image: %s\n", d >= 0 ? " ok " : "fail", data, image ); - if( d >= 0 ) - { - struct stat st; - fstat( d, &st ); - if( st.st_size < ( TargetFrameSize - 16 ) ) - { - lseek( d, 0, SEEK_SET ); - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = read( d, ptr, st.st_size ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } - else - { - tracy_free_fast( ptr ); - } - } - close( d ); - } - } - } - else - { - TracyDebug( "DebugInfo invalid query fn: %s, image: %s\n", data, image ); - } -#endif - - if( !ok && m_sourceCallback ) - { - size_t sz; - char* ptr = m_sourceCallback( m_sourceCallbackData, data, sz ); - if( ptr ) - { - if( sz < ( TargetFrameSize - 16 ) ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)sz ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } - else - { - tracy_free_fast( ptr ); - } - } - } - - if( !ok ) - { - TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable ); - MemWrite( &item->sourceCodeNotAvailable, id ); - TracyLfqCommit; - } - - tracy_free_fast( data ); - tracy_free_fast( image ); -} - -#if defined _WIN32 && defined TRACY_TIMER_QPC -int64_t Profiler::GetTimeQpc() -{ - LARGE_INTEGER t; - QueryPerformanceCounter( &t ); - return t.QuadPart; -} -#endif - -} - -#ifdef __cplusplus -extern "C" { -#endif - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) return ctx; - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneBegin ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) return ctx; - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - auto zoneQueue = tracy::QueueType::ZoneBegin; - if( depth > 0 && tracy::has_callstack() ) - { - tracy::GetProfiler().SendCallstack( depth ); - zoneQueue = tracy::QueueType::ZoneBeginCallstack; - } - TracyQueuePrepareC( zoneQueue ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); - - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) - { - tracy::tracy_free( (void*)srcloc ); - return ctx; - } - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) - { - tracy::tracy_free( (void*)srcloc ); - return ctx; - } - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - auto zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLoc; - if( depth > 0 && tracy::has_callstack() ) - { - tracy::GetProfiler().SendCallstack( depth ); - zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLocCallstack; - } - TracyQueuePrepareC( zoneQueue ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); - - return ctx; -} - -TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) -{ - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneEnd ); - tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() ); - TracyQueueCommitC( zoneEndThread ); - } -} - -TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) -{ - assert( size < std::numeric_limits::max() ); - if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size ); - memcpy( ptr, txt, size ); -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneText ); - tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommitC( zoneTextFatThread ); - } -} - -TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) -{ - assert( size < std::numeric_limits::max() ); - if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size ); - memcpy( ptr, txt, size ); -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneName ); - tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommitC( zoneTextFatThread ); - } -} - -TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) { - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneColor ); - tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); - tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyQueueCommitC( zoneColorThread ); - } -} - -TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) -{ - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneValue ); - tracy::MemWrite( &item->zoneValue.value, value ); - TracyQueueCommitC( zoneValueThread ); - } -} - -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); - } - else - { - tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); - } -} -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); - } - else - { - tracy::Profiler::MemFree( ptr, secure != 0 ); - } -} -TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ) { tracy::Profiler::MemDiscard( name, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemDiscardCallstack( name, secure != 0, depth ); - } - else - { - tracy::Profiler::MemDiscard( name, secure != 0 ); - } -} -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); - } - else - { - tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); - } -} -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); - } - else - { - tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); - } -} -TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } -TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } -TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip != 0 ); } -TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step != 0, fill != 0, color ); } -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ) { tracy::Profiler::Message( txt, size, callstack_depth ); } -TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ) { tracy::Profiler::Message( txt, callstack_depth ); } -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, size, color, callstack_depth ); } -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, color, callstack_depth ); } -TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } - -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); -} - -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - tracy::GetProfiler().SendCallstack( data.depth ); - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - tracy::GetProfiler().SendCallstack( data.depth ); - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); - tracy::MemWrite( &item->gpuTime.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuNewContext.period, data.period ); - tracy::MemWrite( &item->gpuNewContext.context, data.context ); - tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); - tracy::MemWrite( &item->gpuNewContext.type, data.type ); - -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data ) -{ - auto ptr = (char*)tracy::tracy_malloc( data.len ); - memcpy( ptr, data.name, data.len ); - - TracyLfqPrepareC( tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); - -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); - tracy::MemWrite( &item->gpuCalibration.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); - tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTimeSync.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); - tracy::MemWrite( &item->gpuTime.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuNewContext.period, data.period ); - tracy::MemWrite( &item->gpuNewContext.context, data.context ); - tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); - tracy::MemWrite( &item->gpuNewContext.type, data.type ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data ) -{ - auto ptr = (char*)tracy::tracy_malloc( data.len ); - memcpy( ptr, data.name, data.len ); - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); - tracy::MemWrite( &item->gpuCalibration.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); - tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTimeSync.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -struct __tracy_lockable_context_data -{ - uint32_t m_id; -#ifdef TRACY_ON_DEMAND - std::atomic m_lockCount; - std::atomic m_active; -#endif -}; - -TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) -{ - struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); - lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); -#ifdef TRACY_ON_DEMAND - new(&lockdata->m_lockCount) std::atomic( 0 ); - new(&lockdata->m_active) std::atomic( false ); -#endif - assert( lockdata->m_id != (std::numeric_limits::max)() ); - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); - tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); - tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); - tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - tracy::Profiler::QueueSerialFinish(); - - return lockdata; -} - -TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); - tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); - tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - tracy::Profiler::QueueSerialFinish(); - -#ifdef TRACY_ON_DEMAND - lockdata->m_lockCount.~atomic(); - lockdata->m_active.~atomic(); -#endif - tracy::tracy_free((void*)lockdata); -} - -TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) -{ -#ifdef TRACY_ON_DEMAND - bool queue = false; - const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = lockdata->m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = tracy::GetProfiler().IsConnected(); - if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return static_cast(false); -#endif - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); - tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); - tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); - return static_cast(true); -} - -TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); - tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); - tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) -{ -#ifdef TRACY_ON_DEMAND - lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); - if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; - if( !tracy::GetProfiler().IsConnected() ) - { - lockdata->m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); - tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); - tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ) -{ -#ifdef TRACY_ON_DEMAND - if( !acquired ) return; - - bool queue = false; - const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = lockdata->m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = tracy::GetProfiler().IsConnected(); - if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return; -#endif - - if( acquired ) - { - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); - tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); - tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); - } -} - -TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) -{ -#ifdef TRACY_ON_DEMAND - const auto active = lockdata->m_active.load( std::memory_order_relaxed ); - if( !active ) return; - const auto connected = tracy::GetProfiler().IsConnected(); - if( !connected ) - { - if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); - tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); - tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) -{ - assert( nameSz < (std::numeric_limits::max)() ); - auto ptr = (char*)tracy::tracy_malloc( nameSz ); - memcpy( ptr, name, nameSz ); - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); - tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); - tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); - tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API int32_t ___tracy_connected( void ) -{ - return static_cast( tracy::GetProfiler().IsConnected() ); -} - -#ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } -TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } -#endif - -# if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT -TRACY_API void ___tracy_startup_profiler( void ) -{ - tracy::StartupProfiler(); -} - -TRACY_API void ___tracy_shutdown_profiler( void ) -{ - tracy::ShutdownProfiler(); -} - -TRACY_API int32_t ___tracy_profiler_started( void ) -{ - return static_cast( tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ) ); -} -# endif - -#ifdef __cplusplus -} -#endif - -#endif +#ifdef TRACY_ENABLE + +#ifdef _WIN32 +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include +# include +# include +# include +# include +# include "../common/TracyUwp.hpp" +# if defined __GNUC__ +# include +# endif +#else +# include +# include +#endif + +#ifdef _GNU_SOURCE +# include +#endif + +#ifdef __linux__ +# include +# include +# include +# include +#endif + +#if defined __APPLE__ || defined BSD +# include +# include +#endif + +#if defined __APPLE__ +# include "TargetConditionals.h" +# include +#endif + +#ifdef __ANDROID__ +# include +# include +# include +# include +# include +# include +#endif + +#ifdef __QNX__ +# include +# include +# include +# include +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../common/TracyAlign.hpp" +#include "../common/TracyAlloc.hpp" +#include "../common/TracySocket.hpp" +#include "../common/TracySystem.hpp" +#include "../common/TracyYield.hpp" +#include "../common/tracy_lz4.hpp" +#include "tracy_rpmalloc.hpp" +#include "TracyCallstack.hpp" +#include "TracyDebug.hpp" +#include "TracyDxt1.hpp" +#include "TracyScoped.hpp" +#include "TracyProfiler.hpp" +#include "TracyThread.hpp" +#include "TracyArmCpuTable.hpp" +#include "TracySysTrace.hpp" +#include "../tracy/TracyC.h" + +#if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) +# error "TRACY_MANUAL_LIFETIME requires enabled TRACY_DELAYED_INIT" +#endif + +#ifdef TRACY_PORT +# ifndef TRACY_DATA_PORT +# define TRACY_DATA_PORT TRACY_PORT +# endif +# ifndef TRACY_BROADCAST_PORT +# define TRACY_BROADCAST_PORT TRACY_PORT +# endif +#endif + +#ifdef __APPLE__ +# ifndef TRACY_DELAYED_INIT +# define TRACY_DELAYED_INIT +# endif +#else +# if defined __GNUC__ +# define init_order( val ) __attribute__ ((init_priority(val))) +# else +# define init_order(x) +# endif +#endif + +#if defined _WIN32 +# include +extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); +extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); +extern "C" typedef char* (WINAPI *t_WineGetVersion)(); +extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); + +# if defined __GNUC__ + // _WIN32 +# include +#endif + +#else +# include +# include +# include +#endif +#if defined __linux__ +# include +# include +#endif + +#if !defined _WIN32 && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) +# include "TracyCpuid.hpp" +#endif + +#if !( ( defined _WIN32 && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ ) +# include +#endif + +#ifdef __QNX__ +extern char* __progname; +#endif + +namespace tracy +{ + +#ifdef __ANDROID__ +// Implementation helpers of EnsureReadable(address). +// This is so far only needed on Android, where it is common for libraries to be mapped +// with only executable, not readable, permissions. Typical example (line from /proc/self/maps): +/* +746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so +*/ +// See https://github.com/wolfpld/tracy/issues/125 . +// To work around this, we parse /proc/self/maps and we use mprotect to set read permissions +// on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery. + +namespace { +// Holds some information about a single memory mapping. +struct MappingInfo { + // Start of address range. Inclusive. + uintptr_t start_address; + // End of address range. Exclusive, so the mapping is the half-open interval + // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps. + uintptr_t end_address; + // Read/Write/Executable permissions. + bool perm_r, perm_w, perm_x; +}; +} // anonymous namespace + + // Internal implementation helper for LookUpMapping(address). + // + // Parses /proc/self/maps returning a vector. + // /proc/self/maps is assumed to be sorted by ascending address, so the resulting + // vector is sorted by ascending address too. +static std::vector ParseMappings() +{ + std::vector result; + FILE* file = fopen( "/proc/self/maps", "r" ); + if( !file ) return result; + char line[1024]; + while( fgets( line, sizeof( line ), file ) ) + { + uintptr_t start_addr; + uintptr_t end_addr; +#if defined(__LP64__) + if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; +#else + if (sscanf( line, "%dx-%dx", &start_addr, &end_addr ) != 2 ) continue; +#endif + char* first_space = strchr( line, ' ' ); + if( !first_space ) continue; + char* perm = first_space + 1; + char* second_space = strchr( perm, ' ' ); + if( !second_space || second_space - perm != 4 ) continue; + result.emplace_back(); + auto& mapping = result.back(); + mapping.start_address = start_addr; + mapping.end_address = end_addr; + mapping.perm_r = perm[0] == 'r'; + mapping.perm_w = perm[1] == 'w'; + mapping.perm_x = perm[2] == 'x'; + } + fclose( file ); + return result; +} + +// Internal implementation helper for LookUpMapping(address). +// +// Takes as input an `address` and a known vector `mappings`, assumed to be +// sorted by increasing addresses, as /proc/self/maps seems to be. +// Returns a pointer to the MappingInfo describing the mapping that this +// address belongs to, or nullptr if the address isn't in `mappings`. +static MappingInfo* LookUpMapping(std::vector& mappings, uintptr_t address) +{ + // Comparison function for std::lower_bound. Returns true if all addresses in `m1` + // are lower than `addr`. + auto Compare = []( const MappingInfo& m1, uintptr_t addr ) { + // '<=' because the address ranges are half-open intervals, [start, end). + return m1.end_address <= addr; + }; + auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare ); + if( iter == mappings.end() || iter->start_address > address) { + return nullptr; + } + return &*iter; +} + +// Internal implementation helper for EnsureReadable(address). +// +// Takes as input an `address` and returns a pointer to a MappingInfo +// describing the mapping that this address belongs to, or nullptr if +// the address isn't in any known mapping. +// +// This function is stateful and not reentrant (assumes to be called from +// only one thread). It holds a vector of mappings parsed from /proc/self/maps. +// +// Attempts to react to mappings changes by re-parsing /proc/self/maps. +static MappingInfo* LookUpMapping(uintptr_t address) +{ + // Static state managed by this function. Not constant, we mutate that state as + // we turn some mappings readable. Initially parsed once here, updated as needed below. + static std::vector s_mappings = ParseMappings(); + MappingInfo* mapping = LookUpMapping( s_mappings, address ); + if( mapping ) return mapping; + + // This address isn't in any known mapping. Try parsing again, maybe + // mappings changed. + s_mappings = ParseMappings(); + return LookUpMapping( s_mappings, address ); +} + +// Internal implementation helper for EnsureReadable(address). +// +// Attempts to make the specified `mapping` readable if it isn't already. +// Returns true if and only if the mapping is readable. +static bool EnsureReadable( MappingInfo& mapping ) +{ + if( mapping.perm_r ) + { + // The mapping is already readable. + return true; + } + int prot = PROT_READ; + if( mapping.perm_w ) prot |= PROT_WRITE; + if( mapping.perm_x ) prot |= PROT_EXEC; + if( mprotect( reinterpret_cast( mapping.start_address ), + mapping.end_address - mapping.start_address, prot ) == -1 ) + { + // Failed to make the mapping readable. Shouldn't happen, hasn't + // been observed yet. If it happened in practice, we should consider + // adding a bool to MappingInfo to track this to avoid retrying mprotect + // everytime on such mappings. + return false; + } + // The mapping is now readable. Update `mapping` so the next call will be fast. + mapping.perm_r = true; + return true; +} + +// Attempts to set the read permission on the entire mapping containing the +// specified address. Returns true if and only if the mapping is now readable. +static bool EnsureReadable( uintptr_t address ) +{ + MappingInfo* mapping = LookUpMapping(address); + return mapping && EnsureReadable( *mapping ); +} +#elif defined WIN32 +static bool EnsureReadable( uintptr_t address ) +{ + MEMORY_BASIC_INFORMATION memInfo; + VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); + return memInfo.Protect != PAGE_NOACCESS; +} +#else +static bool EnsureReadable( uintptr_t address ) +{ + return true; +} +#endif + +#if defined __linux__ + bool +#endif + +#ifndef TRACY_DELAYED_INIT + +struct InitTimeWrapper +{ + int64_t val; +}; + +struct ProducerWrapper +{ + tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; +}; + +struct ThreadHandleWrapper +{ + uint32_t val; +}; +#endif + + +#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 +static inline void CpuId( uint32_t* regs, uint32_t leaf ) +{ + memset(regs, 0, sizeof(uint32_t) * 4); +#if defined _MSC_VER + __cpuidex( (int*)regs, leaf, 0 ); +#else + __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); +#endif +} + +static void InitFailure( const char* msg ) +{ +#if defined _WIN32 + bool hasConsole = false; + bool reopen = false; + const auto attached = AttachConsole( ATTACH_PARENT_PROCESS ); + if( attached ) + { + hasConsole = true; + reopen = true; + } + else + { + const auto err = GetLastError(); + if( err == ERROR_ACCESS_DENIED ) + { + hasConsole = true; + } + } + if( hasConsole ) + { + fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); + if( reopen ) + { + freopen( "CONOUT$", "w", stderr ); + fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); + } + } + else + { +# ifndef TRACY_UWP + MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP ); +# endif + } +#else + fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); +#endif + exit( 1 ); +} + +static bool CheckHardwareSupportsInvariantTSC() +{ + const char* noCheck = GetEnvVar( "TRACY_NO_INVARIANT_CHECK" ); + if( noCheck && noCheck[0] == '1' ) return true; + + uint32_t regs[4]; + CpuId( regs, 1 ); + if( !( regs[3] & ( 1 << 4 ) ) ) + { +#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK + InitFailure( "CPU doesn't support RDTSC instruction." ); +#else + return false; +#endif + } + CpuId( regs, 0x80000007 ); + if( regs[3] & ( 1 << 8 ) ) return true; + + return false; +} + +#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER +bool HardwareSupportsInvariantTSC() +{ + static bool cachedResult = CheckHardwareSupportsInvariantTSC(); + return cachedResult; +} +#endif + +static int64_t SetupHwTimer() +{ +#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK + if( !CheckHardwareSupportsInvariantTSC() ) + { +#if defined _WIN32 + InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." ); +#else + InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." ); +#endif + } +#endif + + return Profiler::GetTime(); +} +#else +static int64_t SetupHwTimer() +{ + return Profiler::GetTime(); +} +#endif + +static const char* GetProcessName() +{ + const char* processName = "unknown"; +#ifdef _WIN32 + static char buf[_MAX_PATH]; + GetModuleFileNameA( nullptr, buf, _MAX_PATH ); + const char* ptr = buf; + while( *ptr != '\0' ) ptr++; + while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--; + if( ptr > buf ) ptr++; + processName = ptr; +#elif defined __ANDROID__ +# if __ANDROID_API__ >= 21 + auto buf = getprogname(); + if( buf ) processName = buf; +# endif +#elif defined __linux__ && defined _GNU_SOURCE + if( program_invocation_short_name ) processName = program_invocation_short_name; +#elif defined __APPLE__ || defined BSD + auto buf = getprogname(); + if( buf ) processName = buf; +#elif defined __QNX__ + processName = __progname; +#endif + return processName; +} + +static const char* GetProcessExecutablePath() +{ +#ifdef _WIN32 + static char buf[_MAX_PATH]; + GetModuleFileNameA( nullptr, buf, _MAX_PATH ); + return buf; +#elif defined __ANDROID__ + return nullptr; +#elif defined __linux__ && defined _GNU_SOURCE + return program_invocation_name; +#elif defined __APPLE__ + static char buf[1024]; + uint32_t size = 1024; + _NSGetExecutablePath( buf, &size ); + return buf; +#elif defined __DragonFly__ + static char buf[1024]; + readlink( "/proc/curproc/file", buf, 1024 ); + return buf; +#elif defined __FreeBSD__ + static char buf[1024]; + int mib[4]; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PATHNAME; + mib[3] = -1; + size_t cb = 1024; + sysctl( mib, 4, buf, &cb, nullptr, 0 ); + return buf; +#elif defined __NetBSD__ + static char buf[1024]; + readlink( "/proc/curproc/exe", buf, 1024 ); + return buf; +#elif defined __QNX__ + static char buf[_PC_PATH_MAX + 1]; + _cmdname(buf); + return buf; +#else + return nullptr; +#endif +} + +#if defined __linux__ && defined __ARM_ARCH +static uint32_t GetHex( char*& ptr, int skip ) +{ + uint32_t ret; + ptr += skip; + char* end; + if( ptr[0] == '0' && ptr[1] == 'x' ) + { + ptr += 2; + ret = strtol( ptr, &end, 16 ); + } + else + { + ret = strtol( ptr, &end, 10 ); + } + ptr = end; + return ret; +} +#endif + +static const char* GetHostInfo() +{ + static char buf[1024]; + auto ptr = buf; +#if defined _WIN32 +# ifdef TRACY_UWP + auto GetVersion = &::GetVersionEx; +# else + auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" ); +# endif + if( !GetVersion ) + { +# ifdef __MINGW32__ + ptr += sprintf( ptr, "OS: Windows (MingW)\n" ); +# else + ptr += sprintf( ptr, "OS: Windows\n" ); +# endif + } + else + { + RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) }; + GetVersion( &ver ); + +# ifdef __MINGW32__ + ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); +# else + auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); + auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); + if( WineGetVersion && WineGetBuildId ) + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); + } + else + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + } +# endif + } +#elif defined __linux__ + struct utsname utsName; + uname( &utsName ); +# if defined __ANDROID__ + ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release ); +# else + ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release ); +# endif +#elif defined __APPLE__ +# if TARGET_OS_IPHONE == 1 + ptr += sprintf( ptr, "OS: Darwin (iOS)\n" ); +# elif TARGET_OS_MAC == 1 + ptr += sprintf( ptr, "OS: Darwin (OSX)\n" ); +# else + ptr += sprintf( ptr, "OS: Darwin (unknown)\n" ); +# endif +#elif defined __DragonFly__ + ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" ); +#elif defined __FreeBSD__ + ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" ); +#elif defined __NetBSD__ + ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); +#elif defined __OpenBSD__ + ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); +#elif defined __QNX__ + ptr += sprintf( ptr, "OS: QNX\n" ); +#else + ptr += sprintf( ptr, "OS: unknown\n" ); +#endif + +#if defined _MSC_VER +# if defined __clang__ + ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); +# else + ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER ); +# endif +#elif defined __clang__ + ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); +#elif defined __GNUC__ + ptr += sprintf( ptr, "Compiler: gcc %i.%i.%i\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ ); +#else + ptr += sprintf( ptr, "Compiler: unknown\n" ); +#endif + +#if defined _WIN32 + InitWinSock(); + + char hostname[512]; + gethostname( hostname, 512 ); + +# ifdef TRACY_UWP + const char* user = ""; +# else + DWORD userSz = UNLEN+1; + char user[UNLEN+1]; + GetUserNameA( user, &userSz ); +# endif + + ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); +#else + char hostname[_POSIX_HOST_NAME_MAX]{}; + char user[_POSIX_LOGIN_NAME_MAX]{}; + + gethostname( hostname, _POSIX_HOST_NAME_MAX ); +# if defined __ANDROID__ + const auto login = getlogin(); + if( login ) + { + strcpy( user, login ); + } + else + { + memcpy( user, "(?)", 4 ); + } +# else + getlogin_r( user, _POSIX_LOGIN_NAME_MAX ); +# endif + + ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); +#endif + +#if defined __i386 || defined _M_IX86 + ptr += sprintf( ptr, "Arch: x86\n" ); +#elif defined __x86_64__ || defined _M_X64 + ptr += sprintf( ptr, "Arch: x64\n" ); +#elif defined __aarch64__ + ptr += sprintf( ptr, "Arch: ARM64\n" ); +#elif defined __ARM_ARCH + ptr += sprintf( ptr, "Arch: ARM\n" ); +#else + ptr += sprintf( ptr, "Arch: unknown\n" ); +#endif + +#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 + uint32_t regs[4]; + char cpuModel[4*4*3+1] = {}; + auto modelPtr = cpuModel; + for( uint32_t i=0x80000002; i<0x80000005; ++i ) + { + CpuId( regs, i ); + memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs ); + } + + ptr += sprintf( ptr, "CPU: %s\n", cpuModel ); +#elif defined __linux__ && defined __ARM_ARCH + bool cpuFound = false; + FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" ); + if( fcpuinfo ) + { + enum { BufSize = 4*1024 }; + char buf[BufSize]; + const auto sz = fread( buf, 1, BufSize, fcpuinfo ); + fclose( fcpuinfo ); + const auto end = buf + sz; + auto cptr = buf; + + uint32_t impl = 0; + uint32_t var = 0; + uint32_t part = 0; + uint32_t rev = 0; + + while( end - cptr > 20 ) + { + while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 ) + { + cptr += 4; + while( end - cptr > 20 && *cptr != '\n' ) cptr++; + cptr++; + } + if( end - cptr <= 20 ) break; + cptr += 4; + if( memcmp( cptr, "implementer\t: ", 14 ) == 0 ) + { + if( impl != 0 ) break; + impl = GetHex( cptr, 14 ); + } + else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 ); + else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 ); + else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 ); + while( *cptr != '\n' && *cptr != '\0' ) cptr++; + cptr++; + } + + if( impl != 0 || var != 0 || part != 0 || rev != 0 ) + { + cpuFound = true; + ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev ); + } + } + if( !cpuFound ) + { + ptr += sprintf( ptr, "CPU: unknown\n" ); + } +#elif defined __APPLE__ && TARGET_OS_IPHONE == 1 + { + size_t sz; + sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 ); + auto str = (char*)tracy_malloc( sz ); + sysctlbyname( "hw.machine", str, &sz, nullptr, 0 ); + ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) ); + tracy_free( str ); + } +#else + ptr += sprintf( ptr, "CPU: unknown\n" ); +#endif +#ifdef __ANDROID__ + char deviceModel[PROP_VALUE_MAX+1]; + char deviceManufacturer[PROP_VALUE_MAX+1]; + __system_property_get( "ro.product.model", deviceModel ); + __system_property_get( "ro.product.manufacturer", deviceManufacturer ); + ptr += sprintf( ptr, "Device: %s %s\n", deviceManufacturer, deviceModel ); +#endif + + ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() ); + +#if defined _WIN32 + MEMORYSTATUSEX statex; + statex.dwLength = sizeof( statex ); + GlobalMemoryStatusEx( &statex ); +# ifdef _MSC_VER + ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 ); +# else + ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 ); +# endif +#elif defined __linux__ + struct sysinfo sysInfo; + sysinfo( &sysInfo ); + ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 ); +#elif defined __APPLE__ + size_t memSize; + size_t sz = sizeof( memSize ); + sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 ); + ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); +#elif defined BSD + size_t memSize; + size_t sz = sizeof( memSize ); + sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); + ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); +#elif defined __QNX__ + struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); + size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); + char *strings = SYSPAGE_ENTRY(strings)->data; + + uint64_t memSize = 0; + size_t i; + for (i = 0; i < count; i++) { + struct asinfo_entry *entry = &entries[i]; + if (strcmp(strings + entry->name, "ram") == 0) { + memSize += entry->end - entry->start + 1; + } + } + memSize = memSize / 1024 / 1024; + ptr += sprintf( ptr, "RAM: %llu MB\n", memSize); +#else + ptr += sprintf( ptr, "RAM: unknown\n" ); +#endif + + return buf; +} + +static uint64_t GetPid() +{ +#if defined _WIN32 + return uint64_t( GetCurrentProcessId() ); +#else + return uint64_t( getpid() ); +#endif +} + +void Profiler::AckServerQuery() +{ + QueueItem item; + MemWrite( &item.hdr.type, QueueType::AckServerQueryNoop ); + NeedDataSize( QueueDataSize[(int)QueueType::AckServerQueryNoop] ); + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckServerQueryNoop] ); +} + +void Profiler::AckSymbolCodeNotAvailable() +{ + QueueItem item; + MemWrite( &item.hdr.type, QueueType::AckSymbolCodeNotAvailable ); + NeedDataSize( QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); +} + +static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port ) +{ + static BroadcastMessage msg; + + msg.broadcastVersion = BroadcastVersion; + msg.protocolVersion = ProtocolVersion; + msg.listenPort = port; + msg.pid = GetPid(); + + memcpy( msg.programName, procname, pnsz ); + memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); + + len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 ); + return msg; +} + +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER +static DWORD s_profilerThreadId = 0; +static DWORD s_symbolThreadId = 0; +static char s_crashText[1024]; + +LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) +{ + if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH; + + const unsigned ec = pExp->ExceptionRecord->ExceptionCode; + auto msgPtr = s_crashText; + switch( ec ) + { + case EXCEPTION_ACCESS_VIOLATION: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec ); + switch( pExp->ExceptionRecord->ExceptionInformation[0] ) + { + case 0: + msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); + break; + case 1: + msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); + break; + case 8: + msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); + break; + default: + break; + } + break; + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec ); + break; + case EXCEPTION_DATATYPE_MISALIGNMENT: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec ); + break; + case EXCEPTION_FLT_DIVIDE_BY_ZERO: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec ); + break; + case EXCEPTION_ILLEGAL_INSTRUCTION: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec ); + break; + case EXCEPTION_IN_PAGE_ERROR: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec ); + break; + case EXCEPTION_INT_DIVIDE_BY_ZERO: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec ); + break; + case EXCEPTION_PRIV_INSTRUCTION: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec ); + break; + case EXCEPTION_STACK_OVERFLOW: + msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec ); + break; + default: + return EXCEPTION_CONTINUE_SEARCH; + } + + { + GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); + + TracyQueuePrepare( QueueType::CrashReport ); + item->crashReport.time = Profiler::GetTime(); + item->crashReport.text = (uint64_t)s_crashText; + TracyQueueCommit( crashReportThread ); + } + + HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); + if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH; + + THREADENTRY32 te = { sizeof( te ) }; + if( !Thread32First( h, &te ) ) + { + CloseHandle( h ); + return EXCEPTION_CONTINUE_SEARCH; + } + + const auto pid = GetCurrentProcessId(); + const auto tid = GetCurrentThreadId(); + + do + { + if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId && te.th32ThreadID != s_symbolThreadId ) + { + HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID ); + if( th != INVALID_HANDLE_VALUE ) + { + SuspendThread( th ); + CloseHandle( th ); + } + } + } + while( Thread32Next( h, &te ) ); + CloseHandle( h ); + + { + TracyLfqPrepare( QueueType::Crash ); + TracyLfqCommit; + } + + std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); + GetProfiler().RequestShutdown(); + while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; + + return EXCEPTION_CONTINUE_SEARCH; +} +#endif + +static Profiler* s_instance = nullptr; +static Thread* s_thread; +#ifndef TRACY_NO_FRAME_IMAGE +static Thread* s_compressThread; +#endif +#ifdef TRACY_HAS_CALLSTACK +static Thread* s_symbolThread; +std::atomic s_symbolThreadGone { false }; +#endif +#ifdef TRACY_HAS_SYSTEM_TRACING +static Thread* s_sysTraceThread = nullptr; +#endif + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER +# ifndef TRACY_CRASH_SIGNAL +# define TRACY_CRASH_SIGNAL SIGPWR +# endif + +static long s_profilerTid = 0; +static long s_symbolTid = 0; +static char s_crashText[1024]; +static std::atomic s_alreadyCrashed( false ); + +static void ThreadFreezer( int /*signal*/ ) +{ + for(;;) sleep( 1000 ); +} + +static inline void HexPrint( char*& ptr, uint64_t val ) +{ + if( val == 0 ) + { + *ptr++ = '0'; + return; + } + + static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + char buf[16]; + auto bptr = buf; + + do + { + *bptr++ = HexTable[val%16]; + val /= 16; + } + while( val > 0 ); + + do + { + *ptr++ = *--bptr; + } + while( bptr != buf ); +} + +static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) +{ + bool expected = false; + if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal ); + + struct sigaction act = {}; + act.sa_handler = SIG_DFL; + sigaction( SIGABRT, &act, nullptr ); + + auto msgPtr = s_crashText; + switch( signal ) + { + case SIGILL: + strcpy( msgPtr, "Illegal Instruction.\n" ); + while( *msgPtr ) msgPtr++; + switch( info->si_code ) + { + case ILL_ILLOPC: + strcpy( msgPtr, "Illegal opcode.\n" ); + break; + case ILL_ILLOPN: + strcpy( msgPtr, "Illegal operand.\n" ); + break; + case ILL_ILLADR: + strcpy( msgPtr, "Illegal addressing mode.\n" ); + break; + case ILL_ILLTRP: + strcpy( msgPtr, "Illegal trap.\n" ); + break; + case ILL_PRVOPC: + strcpy( msgPtr, "Privileged opcode.\n" ); + break; + case ILL_PRVREG: + strcpy( msgPtr, "Privileged register.\n" ); + break; + case ILL_COPROC: + strcpy( msgPtr, "Coprocessor error.\n" ); + break; + case ILL_BADSTK: + strcpy( msgPtr, "Internal stack error.\n" ); + break; + default: + break; + } + break; + case SIGFPE: + strcpy( msgPtr, "Floating-point exception.\n" ); + while( *msgPtr ) msgPtr++; + switch( info->si_code ) + { + case FPE_INTDIV: + strcpy( msgPtr, "Integer divide by zero.\n" ); + break; + case FPE_INTOVF: + strcpy( msgPtr, "Integer overflow.\n" ); + break; + case FPE_FLTDIV: + strcpy( msgPtr, "Floating-point divide by zero.\n" ); + break; + case FPE_FLTOVF: + strcpy( msgPtr, "Floating-point overflow.\n" ); + break; + case FPE_FLTUND: + strcpy( msgPtr, "Floating-point underflow.\n" ); + break; + case FPE_FLTRES: + strcpy( msgPtr, "Floating-point inexact result.\n" ); + break; + case FPE_FLTINV: + strcpy( msgPtr, "Floating-point invalid operation.\n" ); + break; + case FPE_FLTSUB: + strcpy( msgPtr, "Subscript out of range.\n" ); + break; + default: + break; + } + break; + case SIGSEGV: + strcpy( msgPtr, "Invalid memory reference.\n" ); + while( *msgPtr ) msgPtr++; + switch( info->si_code ) + { + case SEGV_MAPERR: + strcpy( msgPtr, "Address not mapped to object.\n" ); + break; + case SEGV_ACCERR: + strcpy( msgPtr, "Invalid permissions for mapped object.\n" ); + break; +# ifdef SEGV_BNDERR + case SEGV_BNDERR: + strcpy( msgPtr, "Failed address bound checks.\n" ); + break; +# endif +# ifdef SEGV_PKUERR + case SEGV_PKUERR: + strcpy( msgPtr, "Access was denied by memory protection keys.\n" ); + break; +# endif + default: + break; + } + break; + case SIGPIPE: + strcpy( msgPtr, "Broken pipe.\n" ); + while( *msgPtr ) msgPtr++; + break; + case SIGBUS: + strcpy( msgPtr, "Bus error.\n" ); + while( *msgPtr ) msgPtr++; + switch( info->si_code ) + { + case BUS_ADRALN: + strcpy( msgPtr, "Invalid address alignment.\n" ); + break; + case BUS_ADRERR: + strcpy( msgPtr, "Nonexistent physical address.\n" ); + break; + case BUS_OBJERR: + strcpy( msgPtr, "Object-specific hardware error.\n" ); + break; +# ifdef BUS_MCEERR_AR + case BUS_MCEERR_AR: + strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" ); + break; +# endif +# ifdef BUS_MCEERR_AO + case BUS_MCEERR_AO: + strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" ); + break; +# endif + default: + break; + } + break; + case SIGABRT: + strcpy( msgPtr, "Abort signal from abort().\n" ); + break; + default: + abort(); + } + while( *msgPtr ) msgPtr++; + + if( signal != SIGPIPE ) + { + strcpy( msgPtr, "Fault address: 0x" ); + while( *msgPtr ) msgPtr++; + HexPrint( msgPtr, uint64_t( info->si_addr ) ); + *msgPtr++ = '\n'; + } + + { + GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); + + TracyQueuePrepare( QueueType::CrashReport ); + item->crashReport.time = Profiler::GetTime(); + item->crashReport.text = (uint64_t)s_crashText; + TracyQueueCommit( crashReportThread ); + } + + DIR* dp = opendir( "/proc/self/task" ); + if( !dp ) abort(); + + const auto selfTid = syscall( SYS_gettid ); + + struct dirent* ep; + while( ( ep = readdir( dp ) ) != nullptr ) + { + if( ep->d_name[0] == '.' ) continue; + int tid = atoi( ep->d_name ); + if( tid != selfTid && tid != s_profilerTid && tid != s_symbolTid ) + { + syscall( SYS_tkill, tid, TRACY_CRASH_SIGNAL ); + } + } + closedir( dp ); + +#ifdef TRACY_HAS_CALLSTACK + if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release ); +#endif + + TracyLfqPrepare( QueueType::Crash ); + TracyLfqCommit; + + std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); + GetProfiler().RequestShutdown(); + while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; + + abort(); +} +#endif + + +enum { QueuePrealloc = 256 * 1024 }; + +TRACY_API int64_t GetFrequencyQpc() +{ +#if defined _WIN32 + LARGE_INTEGER t; + QueryPerformanceFrequency( &t ); + return t.QuadPart; +#else + return 0; +#endif +} + +#ifdef TRACY_DELAYED_INIT +struct ThreadNameData; +TRACY_API moodycamel::ConcurrentQueue& GetQueue(); + +struct ProfilerData +{ + int64_t initTime = SetupHwTimer(); + moodycamel::ConcurrentQueue queue; + Profiler profiler; + std::atomic lockCounter { 0 }; + std::atomic gpuCtxCounter { 0 }; + std::atomic threadNameData { nullptr }; +}; + +struct ProducerWrapper +{ + ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {} + moodycamel::ProducerToken detail; + tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; +}; + +struct ProfilerThreadData +{ + ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {} + ProducerWrapper token; + GpuCtxWrapper gpuCtx; +# ifdef TRACY_ON_DEMAND + LuaZoneState luaZoneState; +# endif +}; + +std::atomic RpInitDone { 0 }; +std::atomic RpInitLock { 0 }; +thread_local bool RpThreadInitDone = false; +thread_local bool RpThreadShutdown = false; + +# ifdef TRACY_MANUAL_LIFETIME +ProfilerData* s_profilerData = nullptr; +static ProfilerThreadData& GetProfilerThreadData(); +static std::atomic s_isProfilerStarted { false }; +TRACY_API void StartupProfiler() +{ + s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); + new (s_profilerData) ProfilerData(); + s_profilerData->profiler.SpawnWorkerThreads(); + GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); + s_isProfilerStarted.store( true, std::memory_order_seq_cst ); +} +static ProfilerData& GetProfilerData() +{ + assert( s_profilerData ); + return *s_profilerData; +} +TRACY_API void ShutdownProfiler() +{ + s_isProfilerStarted.store( false, std::memory_order_seq_cst ); + s_profilerData->~ProfilerData(); + tracy_free( s_profilerData ); + s_profilerData = nullptr; + rpmalloc_finalize(); + RpThreadInitDone = false; + RpInitDone.store( 0, std::memory_order_release ); +} +TRACY_API bool IsProfilerStarted() +{ + return s_isProfilerStarted.load( std::memory_order_seq_cst ); +} +# else +static std::atomic profilerDataLock { 0 }; +static std::atomic profilerData { nullptr }; + +static ProfilerData& GetProfilerData() +{ + auto ptr = profilerData.load( std::memory_order_acquire ); + if( !ptr ) + { + int expected = 0; + while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } + ptr = profilerData.load( std::memory_order_acquire ); + if( !ptr ) + { + ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); + new (ptr) ProfilerData(); + profilerData.store( ptr, std::memory_order_release ); + } + profilerDataLock.store( 0, std::memory_order_release ); + } + return *ptr; +} +# endif + +// GCC prior to 8.4 had a bug with function-inline thread_local variables. Versions of glibc beginning with +// 2.18 may attempt to work around this issue, which manifests as a crash while running static destructors +// if this function is compiled into a shared object. Unfortunately, centos7 ships with glibc 2.17. If running +// on old GCC, use the old-fashioned way as a workaround +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85400 +#if !defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 8) || ((__GNUC__ == 8) && (__GNUC_MINOR__ < 4))) +struct ProfilerThreadDataKey +{ +public: + ProfilerThreadDataKey() + { + int val = pthread_key_create(&m_key, sDestructor); + static_cast(val); // unused + assert(val == 0); + } + ~ProfilerThreadDataKey() + { + int val = pthread_key_delete(m_key); + static_cast(val); // unused + assert(val == 0); + } + ProfilerThreadData& get() + { + void* p = pthread_getspecific(m_key); + if (!p) + { + p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) ); + new (p) ProfilerThreadData(GetProfilerData()); + pthread_setspecific(m_key, p); + } + return *static_cast(p); + } +private: + pthread_key_t m_key; + + static void sDestructor(void* p) + { + ((ProfilerThreadData*)p)->~ProfilerThreadData(); + tracy_free(p); + } +}; + +static ProfilerThreadData& GetProfilerThreadData() +{ + static ProfilerThreadDataKey key; + return key.get(); +} +#else +static ProfilerThreadData& GetProfilerThreadData() +{ + thread_local ProfilerThreadData data( GetProfilerData() ); + return data; +} +#endif + +TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; } +TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; } +TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return GetProfilerData().queue; } +TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; } +TRACY_API std::atomic& GetLockCounter() { return GetProfilerData().lockCounter; } +TRACY_API std::atomic& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; } +TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; } +TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); } +std::atomic& GetThreadNameData() { return GetProfilerData().threadNameData; } + +# ifdef TRACY_ON_DEMAND +TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } +# endif + +# ifndef TRACY_MANUAL_LIFETIME +namespace +{ + const auto& __profiler_init = GetProfiler(); +} +# endif + +#else + +// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this. + +// 1a. But s_queue is needed for initialization of variables in point 2. +extern moodycamel::ConcurrentQueue s_queue; + +// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread. +thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue ); +thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) }; +thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() }; + +# ifdef _MSC_VER +// 1. Initialize these static variables before all other variables. +# pragma warning( disable : 4075 ) +# pragma init_seg( ".CRT$XCB" ) +# endif + +static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; +std::atomic init_order(102) RpInitDone( 0 ); +std::atomic init_order(102) RpInitLock( 0 ); +thread_local bool RpThreadInitDone = false; +thread_local bool RpThreadShutdown = false; +moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); +std::atomic init_order(104) s_lockCounter( 0 ); +std::atomic init_order(104) s_gpuCtxCounter( 0 ); + +thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr }; + +struct ThreadNameData; +static std::atomic init_order(104) s_threadNameDataInstance( nullptr ); +std::atomic& s_threadNameData = s_threadNameDataInstance; + +# ifdef TRACY_ON_DEMAND +thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false }; +# endif + +static Profiler init_order(105) s_profiler; + +TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return s_token.ptr; } +TRACY_API Profiler& GetProfiler() { return s_profiler; } +TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return s_queue; } +TRACY_API int64_t GetInitTime() { return s_initTime.val; } +TRACY_API std::atomic& GetLockCounter() { return s_lockCounter; } +TRACY_API std::atomic& GetGpuCtxCounter() { return s_gpuCtxCounter; } +TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; } +TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; } + +std::atomic& GetThreadNameData() { return s_threadNameData; } + +# ifdef TRACY_ON_DEMAND +TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } +# endif +#endif + +TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } +TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } + +constexpr static size_t SafeSendBufferSize = 65536; + +Profiler::Profiler() + : m_timeBegin( 0 ) + , m_mainThread( detail::GetThreadHandleImpl() ) + , m_epoch( std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count() ) + , m_shutdown( false ) + , m_shutdownManual( false ) + , m_shutdownFinished( false ) + , m_sock( nullptr ) + , m_broadcast( nullptr ) + , m_noExit( false ) + , m_userPort( 0 ) + , m_zoneId( 1 ) + , m_samplingPeriod( 0 ) + , m_stream( LZ4_createStream() ) + , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) ) + , m_bufferOffset( 0 ) + , m_bufferStart( 0 ) + , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) ) + , m_serialQueue( 1024*1024 ) + , m_serialDequeue( 1024*1024 ) +#ifndef TRACY_NO_FRAME_IMAGE + , m_fiQueue( 16 ) + , m_fiDequeue( 16 ) +#endif + , m_symbolQueue( 8*1024 ) + , m_frameCount( 0 ) + , m_isConnected( false ) +#ifdef TRACY_ON_DEMAND + , m_connectionId( 0 ) + , m_deferredQueue( 64*1024 ) +#endif + , m_paramCallback( nullptr ) + , m_sourceCallback( nullptr ) + , m_queryImage( nullptr ) + , m_queryData( nullptr ) + , m_crashHandlerInstalled( false ) + , m_programName( nullptr ) +{ + assert( !s_instance ); + s_instance = this; + +#ifndef TRACY_DELAYED_INIT +# ifdef _MSC_VER + // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. + s_token_detail = moodycamel::ProducerToken( s_queue ); + s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; + s_threadHandle = ThreadHandleWrapper { m_mainThread }; +# else + //#error FilipNur check if works + // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. + s_token_detail = moodycamel::ProducerToken( s_queue ); + s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; + s_threadHandle = ThreadHandleWrapper { m_mainThread }; +# endif +#endif + + CalibrateTimer(); + CalibrateDelay(); + ReportTopology(); + +#ifdef __linux__ + m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); + new(m_kcore) KCore(); +#endif + +#ifndef TRACY_NO_EXIT + const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); + if( noExitEnv && noExitEnv[0] == '1' ) + { + m_noExit = true; + } +#endif + + const char* userPort = GetEnvVar( "TRACY_PORT" ); + if( userPort ) + { + m_userPort = atoi( userPort ); + } + + m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); + +#if defined _WIN32 && defined __GNUC__ + + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + + { // scope for temporary variable originalHandlesCount + int originalHandlesCount = _getmaxstdio(); + + while(_pipe(m_pipe, m_pipeBufSize, _O_BINARY) != 0) + { + if ((errno == EMFILE) || (errno == ENFILE)) + { + // safe upper bound for exceptional situations + if(_getmaxstdio() > (originalHandlesCount + 10)) + { + throw std::runtime_error("Failed to create communication pipe!"); + } + + // as described by Raymond Chen (https://devblogs.microsoft.com/oldnewthing/20070718-00/?p=25963) + // max number of handles in windows is 10000, + // _getmaxstdio() at the start returns 512, so no fear of too much handles + _setmaxstdio(_getmaxstdio() + 1); + } + else + { + m_pipeBufSize /= 2; + } + } + } + +#elif !defined _WIN32 + pipe(m_pipe); +# if defined __APPLE__ || defined BSD + // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default + m_pipeBufSize = 16384; +# else + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce + m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); +# endif + fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); +#endif + +#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) + SpawnWorkerThreads(); +#endif +} + +void Profiler::InstallCrashHandler() +{ + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + struct sigaction threadFreezer = {}; + threadFreezer.sa_handler = ThreadFreezer; + sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); + + struct sigaction crashHandler = {}; + crashHandler.sa_sigaction = CrashHandler; + crashHandler.sa_flags = SA_SIGINFO; + sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); + sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); + sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); + sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); + sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); + sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); +#endif + +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only + // want to catch unhandled exceptions. + m_prevHandler = (void *)SetUnhandledExceptionFilter( CrashFilter ); +#endif + +#ifndef TRACY_NO_CRASH_HANDLER + m_crashHandlerInstalled = true; +#endif + +} + +void Profiler::RemoveCrashHandler() +{ +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); + if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back + } +#endif + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto restore = []( int signum, struct sigaction* prev ) { + struct sigaction old; + sigaction( signum, prev, &old ); + if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back + }; + restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); + restore( SIGILL, &m_prevSignal.ill ); + restore( SIGFPE, &m_prevSignal.fpe ); + restore( SIGSEGV, &m_prevSignal.segv ); + restore( SIGPIPE, &m_prevSignal.pipe ); + restore( SIGBUS, &m_prevSignal.bus ); + restore( SIGABRT, &m_prevSignal.abrt ); + } +#endif + m_crashHandlerInstalled = false; +} + +void Profiler::SpawnWorkerThreads() +{ +#ifdef TRACY_HAS_SYSTEM_TRACING + // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) + // as it can have significant impact on the size of the traces + const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); + const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); + if( disableSystrace ) + { + TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); + } + else if( SysTraceStart( m_samplingPeriod ) ) + { + s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); + std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); + } +#endif + + s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_thread) Thread( LaunchWorker, this ); + +#ifndef TRACY_NO_FRAME_IMAGE + s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_compressThread) Thread( LaunchCompressWorker, this ); +#endif + +#ifdef TRACY_HAS_CALLSTACK + s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_symbolThread) Thread( LaunchSymbolWorker, this ); +#endif + +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + s_profilerThreadId = GetThreadId( s_thread->Handle() ); +# ifdef TRACY_HAS_CALLSTACK + s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); +# endif +#endif + +#ifdef TRACY_HAS_CALLSTACK + InitCallstackCritical(); +#endif + + m_timeBegin.store( GetTime(), std::memory_order_relaxed ); +} + +Profiler::~Profiler() +{ + m_shutdown.store( true, std::memory_order_relaxed ); + + RemoveCrashHandler(); + +#ifdef TRACY_HAS_SYSTEM_TRACING + if( s_sysTraceThread ) + { + SysTraceStop(); + s_sysTraceThread->~Thread(); + tracy_free( s_sysTraceThread ); + } +#endif + +#ifdef TRACY_HAS_CALLSTACK + s_symbolThread->~Thread(); + tracy_free( s_symbolThread ); +#endif + +#ifndef TRACY_NO_FRAME_IMAGE + s_compressThread->~Thread(); + tracy_free( s_compressThread ); +#endif + + s_thread->~Thread(); + tracy_free( s_thread ); + +#ifdef TRACY_HAS_CALLSTACK + EndCallstack(); +#endif + +#ifdef __linux__ + m_kcore->~KCore(); + tracy_free( m_kcore ); +#endif + +#ifndef _WIN32 + close( m_pipe[0] ); + close( m_pipe[1] ); +#elif defined __GNUC__ + // _WIN32 + _close(m_pipe[0]); + _close(m_pipe[1]); +#endif + tracy_free( m_safeSendBuffer ); + + tracy_free( m_lz4Buf ); + tracy_free( m_buffer ); + LZ4_freeStream( (LZ4_stream_t*)m_stream ); + + if( m_sock ) + { + m_sock->~Socket(); + tracy_free( m_sock ); + } + + if( m_broadcast ) + { + m_broadcast->~UdpBroadcast(); + tracy_free( m_broadcast ); + } + + assert( s_instance ); + s_instance = nullptr; +} + +bool Profiler::ShouldExit() +{ + return s_instance->m_shutdown.load( std::memory_order_relaxed ); +} + +void Profiler::Worker() +{ +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + s_profilerTid = syscall( SYS_gettid ); +#endif + + ThreadExitHandler threadExitHandler; + + SetThreadName( "Tracy Profiler" ); + +#ifdef TRACY_DATA_PORT + const bool dataPortSearch = false; + auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT; +#else + const bool dataPortSearch = m_userPort == 0; + auto dataPort = m_userPort != 0 ? m_userPort : 8086; +#endif +#ifdef TRACY_BROADCAST_PORT + const auto broadcastPort = TRACY_BROADCAST_PORT; +#else + const auto broadcastPort = 8086; +#endif + + while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + +#ifdef TRACY_USE_RPMALLOC + rpmalloc_thread_initialize(); +#endif + + m_exectime = 0; + const auto execname = GetProcessExecutablePath(); + if( execname ) + { + struct stat st; + if( stat( execname, &st ) == 0 ) + { + m_exectime = (uint64_t)st.st_mtime; + } + } + + const auto procname = GetProcessName(); + const auto pnsz = std::min( strlen( procname ), WelcomeMessageProgramNameSize - 1 ); + + const auto hostinfo = GetHostInfo(); + const auto hisz = std::min( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 ); + + const uint64_t pid = GetPid(); + + uint8_t flags = 0; + +#ifdef TRACY_ON_DEMAND + flags |= WelcomeFlag::OnDemand; +#endif +#ifdef __APPLE__ + flags |= WelcomeFlag::IsApple; +#endif +#ifndef TRACY_NO_CODE_TRANSFER + flags |= WelcomeFlag::CodeTransfer; +#endif +#ifdef _WIN32 + flags |= WelcomeFlag::CombineSamples; +# ifndef TRACY_NO_CONTEXT_SWITCH + flags |= WelcomeFlag::IdentifySamples; +# endif +#endif + +#if defined __i386 || defined _M_IX86 + uint8_t cpuArch = CpuArchX86; +#elif defined __x86_64__ || defined _M_X64 + uint8_t cpuArch = CpuArchX64; +#elif defined __aarch64__ + uint8_t cpuArch = CpuArchArm64; +#elif defined __ARM_ARCH + uint8_t cpuArch = CpuArchArm32; +#else + uint8_t cpuArch = CpuArchUnknown; +#endif + +#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 + uint32_t regs[4]; + char manufacturer[12]; + CpuId( regs, 0 ); + memcpy( manufacturer, regs+1, 4 ); + memcpy( manufacturer+4, regs+3, 4 ); + memcpy( manufacturer+8, regs+2, 4 ); + + CpuId( regs, 1 ); + uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 ); +#else + const char manufacturer[12] = {}; + uint32_t cpuId = 0; +#endif + + WelcomeMessage welcome; + MemWrite( &welcome.timerMul, m_timerMul ); + MemWrite( &welcome.initBegin, GetInitTime() ); + MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); + MemWrite( &welcome.delay, m_delay ); + MemWrite( &welcome.resolution, m_resolution ); + MemWrite( &welcome.epoch, m_epoch ); + MemWrite( &welcome.exectime, m_exectime ); + MemWrite( &welcome.pid, pid ); + MemWrite( &welcome.samplingPeriod, m_samplingPeriod ); + MemWrite( &welcome.flags, flags ); + MemWrite( &welcome.cpuArch, cpuArch ); + memcpy( welcome.cpuManufacturer, manufacturer, 12 ); + MemWrite( &welcome.cpuId, cpuId ); + memcpy( welcome.programName, procname, pnsz ); + memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); + memcpy( welcome.hostInfo, hostinfo, hisz ); + memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz ); + + moodycamel::ConsumerToken token( GetQueue() ); + + ListenSocket listen; + bool isListening = false; + if( !dataPortSearch ) + { + isListening = listen.Listen( dataPort, 4 ); + } + else + { + for( uint32_t i=0; i<20; i++ ) + { + if( listen.Listen( dataPort+i, 4 ) ) + { + dataPort += i; + isListening = true; + break; + } + } + } + if( !isListening ) + { + for(;;) + { + if( ShouldExit() ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + + ClearQueues( token ); + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } + } + +#ifndef TRACY_NO_BROADCAST + m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) ); + new(m_broadcast) UdpBroadcast(); +# ifdef TRACY_ONLY_LOCALHOST + const char* addr = "127.255.255.255"; +# elif defined TRACY_CLIENT_ADDRESS + const char* addr = TRACY_CLIENT_ADDRESS; +# elif defined __QNX__ + // global broadcast address of 255.255.255.255 is not well-supported by QNX, + // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" +# error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. +# else + const char* addr = "255.255.255.255"; +# endif + if( !m_broadcast->Open( addr, broadcastPort ) ) + { + m_broadcast->~UdpBroadcast(); + tracy_free( m_broadcast ); + m_broadcast = nullptr; + } +#endif + + int broadcastLen = 0; + auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort ); + uint64_t lastBroadcast = 0; + + // Connections loop. + // Each iteration of the loop handles whole connection. Multiple iterations will only + // happen in the on-demand mode or when handshake fails. + for(;;) + { + // Wait for incoming connection + for(;;) + { +#ifndef TRACY_NO_EXIT + if( !m_noExit && ShouldExit() ) + { + if( m_broadcast ) + { + broadcastMsg.activeTime = -1; + m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); + } + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } +#endif + m_sock = listen.Accept(); + if( m_sock ) break; +#ifndef TRACY_ON_DEMAND + ProcessSysTime(); +# ifdef TRACY_HAS_SYSPOWER + m_sysPower.Tick(); +# endif +#endif + + if( m_broadcast ) + { + const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + if( t - lastBroadcast > 3000000000 ) // 3s + { + m_programNameLock.lock(); + if( m_programName ) + { + broadcastMsg = GetBroadcastMessage( m_programName, strlen( m_programName ), broadcastLen, dataPort ); + m_programName = nullptr; + } + m_programNameLock.unlock(); + + lastBroadcast = t; + const auto ts = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); + broadcastMsg.activeTime = int32_t( ts - m_epoch ); + assert( broadcastMsg.activeTime >= 0 ); + m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); + } + } + } + + if( m_broadcast ) + { + lastBroadcast = 0; + broadcastMsg.activeTime = -1; + m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); + } + + // Handshake + { + char shibboleth[HandshakeShibbolethSize]; + auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 ); + if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) + { + m_sock->~Socket(); + tracy_free( m_sock ); + m_sock = nullptr; + continue; + } + + uint32_t protocolVersion; + res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 ); + if( !res ) + { + m_sock->~Socket(); + tracy_free( m_sock ); + m_sock = nullptr; + continue; + } + + if( protocolVersion != ProtocolVersion ) + { + HandshakeStatus status = HandshakeProtocolMismatch; + m_sock->Send( &status, sizeof( status ) ); + m_sock->~Socket(); + tracy_free( m_sock ); + m_sock = nullptr; + continue; + } + } + +#ifdef TRACY_ON_DEMAND + const auto currentTime = GetTime(); + ClearQueues( token ); + m_connectionId.fetch_add( 1, std::memory_order_release ); +#endif + m_isConnected.store( true, std::memory_order_release ); + InstallCrashHandler(); + + HandshakeStatus handshake = HandshakeWelcome; + m_sock->Send( &handshake, sizeof( handshake ) ); + + LZ4_resetStream( (LZ4_stream_t*)m_stream ); + m_sock->Send( &welcome, sizeof( welcome ) ); + + m_threadCtx = 0; + m_refTimeSerial = 0; + m_refTimeCtx = 0; + m_refTimeGpu = 0; + +#ifdef TRACY_ON_DEMAND + OnDemandPayloadMessage onDemand; + onDemand.frames = m_frameCount.load( std::memory_order_relaxed ); + onDemand.currentTime = currentTime; + + m_sock->Send( &onDemand, sizeof( onDemand ) ); + + m_deferredLock.lock(); + for( auto& item : m_deferredQueue ) + { + uint64_t ptr; + uint16_t size; + const auto idx = MemRead( &item.hdr.idx ); + switch( (QueueType)idx ) + { + case QueueType::MessageAppInfo: + ptr = MemRead( &item.messageFat.text ); + size = MemRead( &item.messageFat.size ); + SendSingleString( (const char*)ptr, size ); + break; + case QueueType::LockName: + ptr = MemRead( &item.lockNameFat.name ); + size = MemRead( &item.lockNameFat.size ); + SendSingleString( (const char*)ptr, size ); + break; + case QueueType::GpuContextName: + ptr = MemRead( &item.gpuContextNameFat.ptr ); + size = MemRead( &item.gpuContextNameFat.size ); + SendSingleString( (const char*)ptr, size ); + break; + default: + break; + } + AppendData( &item, QueueDataSize[idx] ); + } + m_deferredLock.unlock(); +#endif + + // Main communications loop + int keepAlive = 0; + for(;;) + { + ProcessSysTime(); +#ifdef TRACY_HAS_SYSPOWER + m_sysPower.Tick(); +#endif + const auto status = Dequeue( token ); + const auto serialStatus = DequeueSerial(); + if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) + { + break; + } + else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) + { + if( ShouldExit() ) break; + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) break; + } + if( keepAlive == 500 ) + { + QueueItem ka; + ka.hdr.type = QueueType::KeepAlive; + AppendData( &ka, QueueDataSize[ka.hdr.idx] ); + if( !CommitData() ) break; + + keepAlive = 0; + } + else if( !m_sock->HasData() ) + { + keepAlive++; + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } + } + else + { + keepAlive = 0; + } + + bool connActive = true; + while( m_sock->HasData() ) + { + connActive = HandleServerQuery(); + if( !connActive ) break; + } + if( !connActive ) break; + } + if( ShouldExit() ) break; + + m_isConnected.store( false, std::memory_order_release ); + RemoveCrashHandler(); + +#ifdef TRACY_ON_DEMAND + m_bufferOffset = 0; + m_bufferStart = 0; +#endif + + m_sock->~Socket(); + tracy_free( m_sock ); + m_sock = nullptr; + +#ifndef TRACY_ON_DEMAND + // Client is no longer available here. Accept incoming connections, but reject handshake. + for(;;) + { + if( ShouldExit() ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + + ClearQueues( token ); + + m_sock = listen.Accept(); + if( m_sock ) + { + char shibboleth[HandshakeShibbolethSize]; + auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 ); + if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) + { + m_sock->~Socket(); + tracy_free( m_sock ); + m_sock = nullptr; + continue; + } + + uint32_t protocolVersion; + res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 ); + if( !res ) + { + m_sock->~Socket(); + tracy_free( m_sock ); + m_sock = nullptr; + continue; + } + + HandshakeStatus status = HandshakeNotAvailable; + m_sock->Send( &status, sizeof( status ) ); + m_sock->~Socket(); + tracy_free( m_sock ); + } + } +#endif + } + // End of connections loop + + // Wait for symbols thread to terminate. Symbol resolution will continue in this thread. +#ifdef TRACY_HAS_CALLSTACK + while( s_symbolThreadGone.load() == false ) { YieldThread(); } +#endif + + // Client is exiting. Send items remaining in queues. + for(;;) + { + const auto status = Dequeue( token ); + const auto serialStatus = DequeueSerial(); + if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) + { + if( m_bufferOffset != m_bufferStart ) CommitData(); + break; + } + + while( m_sock->HasData() ) + { + if( !HandleServerQuery() ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + } + +#ifdef TRACY_HAS_CALLSTACK + for(;;) + { + auto si = m_symbolQueue.front(); + if( !si ) break; + HandleSymbolQueueItem( *si ); + m_symbolQueue.pop(); + } +#endif + } + + // Send client termination notice to the server + QueueItem terminate; + MemWrite( &terminate.hdr.type, QueueType::Terminate ); + if( !SendData( (const char*)&terminate, 1 ) ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + // Handle remaining server queries + for(;;) + { + while( m_sock->HasData() ) + { + if( !HandleServerQuery() ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + } +#ifdef TRACY_HAS_CALLSTACK + for(;;) + { + auto si = m_symbolQueue.front(); + if( !si ) break; + HandleSymbolQueueItem( *si ); + m_symbolQueue.pop(); + } +#endif + const auto status = Dequeue( token ); + const auto serialStatus = DequeueSerial(); + if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) + { + m_shutdownFinished.store( true, std::memory_order_relaxed ); + return; + } + } + } +} + +#ifndef TRACY_NO_FRAME_IMAGE +void Profiler::CompressWorker() +{ + ThreadExitHandler threadExitHandler; + SetThreadName( "Tracy DXT1" ); + while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + +#ifdef TRACY_USE_RPMALLOC + rpmalloc_thread_initialize(); +#endif + + for(;;) + { + const auto shouldExit = ShouldExit(); + + { + bool lockHeld = true; + while( !m_fiLock.try_lock() ) + { + if( m_shutdownManual.load( std::memory_order_relaxed ) ) + { + lockHeld = false; + break; + } + } + if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue ); + if( lockHeld ) + { + m_fiLock.unlock(); + } + } + + const auto sz = m_fiDequeue.size(); + if( sz > 0 ) + { + auto fi = m_fiDequeue.data(); + auto end = fi + sz; + while( fi != end ) + { + const auto w = fi->w; + const auto h = fi->h; + const auto csz = size_t( w * h / 2 ); + auto etc1buf = (char*)tracy_malloc( csz ); + CompressImageDxt1( (const char*)fi->image, etc1buf, w, h ); + tracy_free( fi->image ); + + TracyLfqPrepare( QueueType::FrameImage ); + MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf ); + MemWrite( &item->frameImageFat.frame, fi->frame ); + MemWrite( &item->frameImageFat.w, w ); + MemWrite( &item->frameImageFat.h, h ); + uint8_t flip = fi->flip; + MemWrite( &item->frameImageFat.flip, flip ); + TracyLfqCommit; + + fi++; + } + m_fiDequeue.clear(); + } + else + { + std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); + } + + if( shouldExit ) + { + return; + } + } +} +#endif + +static void FreeAssociatedMemory( const QueueItem& item ) +{ + if( item.hdr.idx >= (int)QueueType::Terminate ) return; + + uint64_t ptr; + switch( item.hdr.type ) + { + case QueueType::ZoneText: + case QueueType::ZoneName: + ptr = MemRead( &item.zoneTextFat.text ); + tracy_free( (void*)ptr ); + break; + case QueueType::MessageColor: + case QueueType::MessageColorCallstack: + ptr = MemRead( &item.messageColorFat.text ); + tracy_free( (void*)ptr ); + break; + case QueueType::Message: + case QueueType::MessageCallstack: +#ifndef TRACY_ON_DEMAND + case QueueType::MessageAppInfo: +#endif + ptr = MemRead( &item.messageFat.text ); + tracy_free( (void*)ptr ); + break; + case QueueType::ZoneBeginAllocSrcLoc: + case QueueType::ZoneBeginAllocSrcLocCallstack: + ptr = MemRead( &item.zoneBegin.srcloc ); + tracy_free( (void*)ptr ); + break; + case QueueType::GpuZoneBeginAllocSrcLoc: + case QueueType::GpuZoneBeginAllocSrcLocCallstack: + case QueueType::GpuZoneBeginAllocSrcLocSerial: + case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: + ptr = MemRead( &item.gpuZoneBegin.srcloc ); + tracy_free( (void*)ptr ); + break; + case QueueType::CallstackSerial: + case QueueType::Callstack: + ptr = MemRead( &item.callstackFat.ptr ); + tracy_free( (void*)ptr ); + break; + case QueueType::CallstackAlloc: + ptr = MemRead( &item.callstackAllocFat.nativePtr ); + tracy_free( (void*)ptr ); + ptr = MemRead( &item.callstackAllocFat.ptr ); + tracy_free( (void*)ptr ); + break; + case QueueType::CallstackSample: + case QueueType::CallstackSampleContextSwitch: + ptr = MemRead( &item.callstackSampleFat.ptr ); + tracy_free( (void*)ptr ); + break; + case QueueType::FrameImage: + ptr = MemRead( &item.frameImageFat.image ); + tracy_free( (void*)ptr ); + break; +#ifdef TRACY_HAS_CALLSTACK + case QueueType::CallstackFrameSize: + { + InitRpmalloc(); + auto size = MemRead( &item.callstackFrameSizeFat.size ); + auto data = (const CallstackEntry*)MemRead( &item.callstackFrameSizeFat.data ); + for( uint8_t i=0; i( &item.symbolInformationFat.needFree ); + if( needFree ) + { + ptr = MemRead( &item.symbolInformationFat.fileString ); + tracy_free( (void*)ptr ); + } + break; + } + case QueueType::SymbolCodeMetadata: + ptr = MemRead( &item.symbolCodeMetadata.ptr ); + tracy_free( (void*)ptr ); + break; +#endif +#ifndef TRACY_ON_DEMAND + case QueueType::LockName: + ptr = MemRead( &item.lockNameFat.name ); + tracy_free( (void*)ptr ); + break; + case QueueType::GpuContextName: + ptr = MemRead( &item.gpuContextNameFat.ptr ); + tracy_free( (void*)ptr ); + break; +#endif +#ifdef TRACY_ON_DEMAND + case QueueType::MessageAppInfo: + case QueueType::GpuContextName: + // Don't free memory associated with deferred messages. + break; +#endif +#ifdef TRACY_HAS_SYSTEM_TRACING + case QueueType::ExternalNameMetadata: + ptr = MemRead( &item.externalNameMetadata.name ); + tracy_free( (void*)ptr ); + ptr = MemRead( &item.externalNameMetadata.threadName ); + tracy_free_fast( (void*)ptr ); + break; +#endif + case QueueType::SourceCodeMetadata: + ptr = MemRead( &item.sourceCodeMetadata.ptr ); + tracy_free( (void*)ptr ); + break; + default: + break; + } +} + +void Profiler::ClearQueues( moodycamel::ConsumerToken& token ) +{ + for(;;) + { + const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } ); + if( sz == 0 ) break; + } + + ClearSerial(); +} + +void Profiler::ClearSerial() +{ + bool lockHeld = true; + while( !m_serialLock.try_lock() ) + { + if( m_shutdownManual.load( std::memory_order_relaxed ) ) + { + lockHeld = false; + break; + } + } + for( auto& v : m_serialQueue ) FreeAssociatedMemory( v ); + m_serialQueue.clear(); + if( lockHeld ) + { + m_serialLock.unlock(); + } + + for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v ); + m_serialDequeue.clear(); +} + +Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) +{ + bool connectionLost = false; + const auto sz = GetQueue().try_dequeue_bulk_single( token, + [this, &connectionLost] ( const uint32_t& threadId ) + { + if( ThreadCtxCheck( threadId ) == ThreadCtxStatus::ConnectionLost ) connectionLost = true; + }, + [this, &connectionLost] ( QueueItem* item, size_t sz ) + { + if( connectionLost ) return; + InitRpmalloc(); + assert( sz > 0 ); + int64_t refThread = m_refTimeThread; + int64_t refCtx = m_refTimeCtx; + int64_t refGpu = m_refTimeGpu; + while( sz-- > 0 ) + { + uint64_t ptr; + uint16_t size; + auto idx = MemRead( &item->hdr.idx ); + if( idx < (int)QueueType::Terminate ) + { + switch( (QueueType)idx ) + { + case QueueType::ZoneText: + case QueueType::ZoneName: + ptr = MemRead( &item->zoneTextFat.text ); + size = MemRead( &item->zoneTextFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free_fast( (void*)ptr ); + break; + case QueueType::Message: + case QueueType::MessageCallstack: + ptr = MemRead( &item->messageFat.text ); + size = MemRead( &item->messageFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free_fast( (void*)ptr ); + break; + case QueueType::MessageColor: + case QueueType::MessageColorCallstack: + ptr = MemRead( &item->messageColorFat.text ); + size = MemRead( &item->messageColorFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free_fast( (void*)ptr ); + break; + case QueueType::MessageAppInfo: + ptr = MemRead( &item->messageFat.text ); + size = MemRead( &item->messageFat.size ); + SendSingleString( (const char*)ptr, size ); +#ifndef TRACY_ON_DEMAND + tracy_free_fast( (void*)ptr ); +#endif + break; + case QueueType::ZoneBeginAllocSrcLoc: + case QueueType::ZoneBeginAllocSrcLocCallstack: + { + int64_t t = MemRead( &item->zoneBegin.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->zoneBegin.time, dt ); + ptr = MemRead( &item->zoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::Callstack: + ptr = MemRead( &item->callstackFat.ptr ); + SendCallstackPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + case QueueType::CallstackAlloc: + ptr = MemRead( &item->callstackAllocFat.nativePtr ); + if( ptr != 0 ) + { + CutCallstack( (void*)ptr, "lua_pcall" ); + SendCallstackPayload( ptr ); + tracy_free_fast( (void*)ptr ); + } + ptr = MemRead( &item->callstackAllocFat.ptr ); + SendCallstackAlloc( ptr ); + tracy_free_fast( (void*)ptr ); + break; + case QueueType::CallstackSample: + case QueueType::CallstackSampleContextSwitch: + { + ptr = MemRead( &item->callstackSampleFat.ptr ); + SendCallstackPayload64( ptr ); + tracy_free_fast( (void*)ptr ); + int64_t t = MemRead( &item->callstackSampleFat.time ); + int64_t dt = t - refCtx; + refCtx = t; + MemWrite( &item->callstackSampleFat.time, dt ); + break; + } + case QueueType::FrameImage: + { + ptr = MemRead( &item->frameImageFat.image ); + const auto w = MemRead( &item->frameImageFat.w ); + const auto h = MemRead( &item->frameImageFat.h ); + const auto csz = size_t( w * h / 2 ); + SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::ZoneBegin: + case QueueType::ZoneBeginCallstack: + { + int64_t t = MemRead( &item->zoneBegin.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->zoneBegin.time, dt ); + break; + } + case QueueType::ZoneEnd: + { + int64_t t = MemRead( &item->zoneEnd.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->zoneEnd.time, dt ); + break; + } + case QueueType::GpuZoneBegin: + case QueueType::GpuZoneBeginCallstack: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + break; + } + case QueueType::GpuZoneBeginAllocSrcLoc: + case QueueType::GpuZoneBeginAllocSrcLocCallstack: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + ptr = MemRead( &item->gpuZoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::GpuZoneEnd: + { + int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->gpuZoneEnd.cpuTime, dt ); + break; + } + case QueueType::GpuContextName: + ptr = MemRead( &item->gpuContextNameFat.ptr ); + size = MemRead( &item->gpuContextNameFat.size ); + SendSingleString( (const char*)ptr, size ); +#ifndef TRACY_ON_DEMAND + tracy_free_fast( (void*)ptr ); +#endif + break; + case QueueType::PlotDataInt: + case QueueType::PlotDataFloat: + case QueueType::PlotDataDouble: + { + int64_t t = MemRead( &item->plotDataInt.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->plotDataInt.time, dt ); + break; + } + case QueueType::ContextSwitch: + { + int64_t t = MemRead( &item->contextSwitch.time ); + int64_t dt = t - refCtx; + refCtx = t; + MemWrite( &item->contextSwitch.time, dt ); + break; + } + case QueueType::ThreadWakeup: + { + int64_t t = MemRead( &item->threadWakeup.time ); + int64_t dt = t - refCtx; + refCtx = t; + MemWrite( &item->threadWakeup.time, dt ); + break; + } + case QueueType::GpuTime: + { + int64_t t = MemRead( &item->gpuTime.gpuTime ); + int64_t dt = t - refGpu; + refGpu = t; + MemWrite( &item->gpuTime.gpuTime, dt ); + break; + } +#ifdef TRACY_HAS_CALLSTACK + case QueueType::CallstackFrameSize: + { + auto data = (const CallstackEntry*)MemRead( &item->callstackFrameSizeFat.data ); + auto datasz = MemRead( &item->callstackFrameSizeFat.size ); + auto imageName = (const char*)MemRead( &item->callstackFrameSizeFat.imageName ); + SendSingleString( imageName ); + AppendData( item++, QueueDataSize[idx] ); + + for( uint8_t i=0; i( &item->symbolInformationFat.fileString ); + auto needFree = MemRead( &item->symbolInformationFat.needFree ); + SendSingleString( fileString ); + if( needFree ) tracy_free_fast( (void*)fileString ); + break; + } + case QueueType::SymbolCodeMetadata: + { + auto symbol = MemRead( &item->symbolCodeMetadata.symbol ); + auto ptr = (const char*)MemRead( &item->symbolCodeMetadata.ptr ); + auto size = MemRead( &item->symbolCodeMetadata.size ); + SendLongString( symbol, ptr, size, QueueType::SymbolCode ); + tracy_free_fast( (void*)ptr ); + ++item; + continue; + } +#endif +#ifdef TRACY_HAS_SYSTEM_TRACING + case QueueType::ExternalNameMetadata: + { + auto thread = MemRead( &item->externalNameMetadata.thread ); + auto name = (const char*)MemRead( &item->externalNameMetadata.name ); + auto threadName = (const char*)MemRead( &item->externalNameMetadata.threadName ); + SendString( thread, threadName, QueueType::ExternalThreadName ); + SendString( thread, name, QueueType::ExternalName ); + tracy_free_fast( (void*)threadName ); + tracy_free_fast( (void*)name ); + ++item; + continue; + } +#endif + case QueueType::SourceCodeMetadata: + { + auto ptr = (const char*)MemRead( &item->sourceCodeMetadata.ptr ); + auto size = MemRead( &item->sourceCodeMetadata.size ); + auto id = MemRead( &item->sourceCodeMetadata.id ); + SendLongString( (uint64_t)id, ptr, size, QueueType::SourceCode ); + tracy_free_fast( (void*)ptr ); + ++item; + continue; + } + default: + assert( false ); + break; + } + } + if( !AppendData( item++, QueueDataSize[idx] ) ) + { + connectionLost = true; + m_refTimeThread = refThread; + m_refTimeCtx = refCtx; + m_refTimeGpu = refGpu; + return; + } + } + m_refTimeThread = refThread; + m_refTimeCtx = refCtx; + m_refTimeGpu = refGpu; + } + ); + if( connectionLost ) return DequeueStatus::ConnectionLost; + return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; +} + +Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ) +{ + const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {}, + [this, &timeStop] ( QueueItem* item, size_t sz ) + { + assert( sz > 0 ); + int64_t refCtx = m_refTimeCtx; + while( sz-- > 0 ) + { + FreeAssociatedMemory( *item ); + if( timeStop < 0 ) return; + const auto idx = MemRead( &item->hdr.idx ); + if( idx == (uint8_t)QueueType::ContextSwitch ) + { + const auto csTime = MemRead( &item->contextSwitch.time ); + if( csTime > timeStop ) + { + timeStop = -1; + m_refTimeCtx = refCtx; + return; + } + int64_t dt = csTime - refCtx; + refCtx = csTime; + MemWrite( &item->contextSwitch.time, dt ); + if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) + { + timeStop = -2; + m_refTimeCtx = refCtx; + return; + } + } + else if( idx == (uint8_t)QueueType::ThreadWakeup ) + { + const auto csTime = MemRead( &item->threadWakeup.time ); + if( csTime > timeStop ) + { + timeStop = -1; + m_refTimeCtx = refCtx; + return; + } + int64_t dt = csTime - refCtx; + refCtx = csTime; + MemWrite( &item->threadWakeup.time, dt ); + if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) + { + timeStop = -2; + m_refTimeCtx = refCtx; + return; + } + } + item++; + } + m_refTimeCtx = refCtx; + } + ); + + if( timeStop == -2 ) return DequeueStatus::ConnectionLost; + return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; +} + +#define ThreadCtxCheckSerial( _name ) \ + uint32_t thread = MemRead( &item->_name.thread ); \ + switch( ThreadCtxCheck( thread ) ) \ + { \ + case ThreadCtxStatus::Same: break; \ + case ThreadCtxStatus::Changed: assert( m_refTimeThread == 0 ); refThread = 0; break; \ + case ThreadCtxStatus::ConnectionLost: return DequeueStatus::ConnectionLost; \ + default: assert( false ); break; \ + } + +Profiler::DequeueStatus Profiler::DequeueSerial() +{ + { + bool lockHeld = true; + while( !m_serialLock.try_lock() ) + { + if( m_shutdownManual.load( std::memory_order_relaxed ) ) + { + lockHeld = false; + break; + } + } + if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue ); + if( lockHeld ) + { + m_serialLock.unlock(); + } + } + + const auto sz = m_serialDequeue.size(); + if( sz > 0 ) + { + InitRpmalloc(); + int64_t refSerial = m_refTimeSerial; + int64_t refGpu = m_refTimeGpu; +#ifdef TRACY_FIBERS + int64_t refThread = m_refTimeThread; +#endif + auto item = m_serialDequeue.data(); + auto end = item + sz; + while( item != end ) + { + uint64_t ptr; + auto idx = MemRead( &item->hdr.idx ); + if( idx < (int)QueueType::Terminate ) + { + switch( (QueueType)idx ) + { + case QueueType::CallstackSerial: + ptr = MemRead( &item->callstackFat.ptr ); + SendCallstackPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + case QueueType::LockWait: + case QueueType::LockSharedWait: + { + int64_t t = MemRead( &item->lockWait.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->lockWait.time, dt ); + break; + } + case QueueType::LockObtain: + case QueueType::LockSharedObtain: + { + int64_t t = MemRead( &item->lockObtain.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->lockObtain.time, dt ); + break; + } + case QueueType::LockRelease: + case QueueType::LockSharedRelease: + { + int64_t t = MemRead( &item->lockRelease.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->lockRelease.time, dt ); + break; + } + case QueueType::LockName: + { + ptr = MemRead( &item->lockNameFat.name ); + uint16_t size = MemRead( &item->lockNameFat.size ); + SendSingleString( (const char*)ptr, size ); +#ifndef TRACY_ON_DEMAND + tracy_free_fast( (void*)ptr ); +#endif + break; + } + case QueueType::MemAlloc: + case QueueType::MemAllocNamed: + case QueueType::MemAllocCallstack: + case QueueType::MemAllocCallstackNamed: + { + int64_t t = MemRead( &item->memAlloc.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->memAlloc.time, dt ); + break; + } + case QueueType::MemFree: + case QueueType::MemFreeNamed: + case QueueType::MemFreeCallstack: + case QueueType::MemFreeCallstackNamed: + { + int64_t t = MemRead( &item->memFree.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->memFree.time, dt ); + break; + } + case QueueType::MemDiscard: + case QueueType::MemDiscardCallstack: + { + int64_t t = MemRead( &item->memDiscard.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->memDiscard.time, dt ); + break; + } + case QueueType::GpuZoneBeginSerial: + case QueueType::GpuZoneBeginCallstackSerial: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + break; + } + case QueueType::GpuZoneBeginAllocSrcLocSerial: + case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + ptr = MemRead( &item->gpuZoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::GpuZoneEndSerial: + { + int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->gpuZoneEnd.cpuTime, dt ); + break; + } + case QueueType::GpuTime: + { + int64_t t = MemRead( &item->gpuTime.gpuTime ); + int64_t dt = t - refGpu; + refGpu = t; + MemWrite( &item->gpuTime.gpuTime, dt ); + break; + } + case QueueType::GpuContextName: + { + ptr = MemRead( &item->gpuContextNameFat.ptr ); + uint16_t size = MemRead( &item->gpuContextNameFat.size ); + SendSingleString( (const char*)ptr, size ); +#ifndef TRACY_ON_DEMAND + tracy_free_fast( (void*)ptr ); +#endif + break; + } +#ifdef TRACY_FIBERS + case QueueType::ZoneBegin: + case QueueType::ZoneBeginCallstack: + { + ThreadCtxCheckSerial( zoneBeginThread ); + int64_t t = MemRead( &item->zoneBegin.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->zoneBegin.time, dt ); + break; + } + case QueueType::ZoneBeginAllocSrcLoc: + case QueueType::ZoneBeginAllocSrcLocCallstack: + { + ThreadCtxCheckSerial( zoneBeginThread ); + int64_t t = MemRead( &item->zoneBegin.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->zoneBegin.time, dt ); + ptr = MemRead( &item->zoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::ZoneEnd: + { + ThreadCtxCheckSerial( zoneEndThread ); + int64_t t = MemRead( &item->zoneEnd.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->zoneEnd.time, dt ); + break; + } + case QueueType::ZoneText: + case QueueType::ZoneName: + { + ThreadCtxCheckSerial( zoneTextFatThread ); + ptr = MemRead( &item->zoneTextFat.text ); + uint16_t size = MemRead( &item->zoneTextFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::Message: + case QueueType::MessageCallstack: + { + ThreadCtxCheckSerial( messageFatThread ); + ptr = MemRead( &item->messageFat.text ); + uint16_t size = MemRead( &item->messageFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::MessageColor: + case QueueType::MessageColorCallstack: + { + ThreadCtxCheckSerial( messageColorFatThread ); + ptr = MemRead( &item->messageColorFat.text ); + uint16_t size = MemRead( &item->messageColorFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::Callstack: + { + ThreadCtxCheckSerial( callstackFatThread ); + ptr = MemRead( &item->callstackFat.ptr ); + SendCallstackPayload( ptr ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::CallstackAlloc: + { + ThreadCtxCheckSerial( callstackAllocFatThread ); + ptr = MemRead( &item->callstackAllocFat.nativePtr ); + if( ptr != 0 ) + { + CutCallstack( (void*)ptr, "lua_pcall" ); + SendCallstackPayload( ptr ); + tracy_free_fast( (void*)ptr ); + } + ptr = MemRead( &item->callstackAllocFat.ptr ); + SendCallstackAlloc( ptr ); + tracy_free_fast( (void*)ptr ); + break; + } + case QueueType::FiberEnter: + { + ThreadCtxCheckSerial( fiberEnter ); + int64_t t = MemRead( &item->fiberEnter.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->fiberEnter.time, dt ); + break; + } + case QueueType::FiberLeave: + { + ThreadCtxCheckSerial( fiberLeave ); + int64_t t = MemRead( &item->fiberLeave.time ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->fiberLeave.time, dt ); + break; + } +#endif + default: + assert( false ); + break; + } + } +#ifdef TRACY_FIBERS + else + { + switch( (QueueType)idx ) + { + case QueueType::ZoneColor: + { + ThreadCtxCheckSerial( zoneColorThread ); + break; + } + case QueueType::ZoneValue: + { + ThreadCtxCheckSerial( zoneValueThread ); + break; + } + case QueueType::ZoneValidation: + { + ThreadCtxCheckSerial( zoneValidationThread ); + break; + } + case QueueType::MessageLiteral: + case QueueType::MessageLiteralCallstack: + { + ThreadCtxCheckSerial( messageLiteralThread ); + break; + } + case QueueType::MessageLiteralColor: + case QueueType::MessageLiteralColorCallstack: + { + ThreadCtxCheckSerial( messageColorLiteralThread ); + break; + } + case QueueType::CrashReport: + { + ThreadCtxCheckSerial( crashReportThread ); + break; + } + default: + break; + } + } +#endif + if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost; + item++; + } + m_refTimeSerial = refSerial; + m_refTimeGpu = refGpu; +#ifdef TRACY_FIBERS + m_refTimeThread = refThread; +#endif + m_serialDequeue.clear(); + } + else + { + return DequeueStatus::QueueEmpty; + } + return DequeueStatus::DataDequeued; +} + +Profiler::ThreadCtxStatus Profiler::ThreadCtxCheck( uint32_t threadId ) +{ + if( m_threadCtx == threadId ) return ThreadCtxStatus::Same; + QueueItem item; + MemWrite( &item.hdr.type, QueueType::ThreadContext ); + MemWrite( &item.threadCtx.thread, threadId ); + if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return ThreadCtxStatus::ConnectionLost; + m_threadCtx = threadId; + m_refTimeThread = 0; + return ThreadCtxStatus::Changed; +} + +bool Profiler::CommitData() +{ + bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart ); + if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0; + m_bufferStart = m_bufferOffset; + return ret; +} + +char* Profiler::SafeCopyProlog( const char* data, size_t size ) +{ + bool success = true; + char* buf = m_safeSendBuffer; +#ifndef NDEBUG + assert( !m_inUse.exchange(true) ); +#endif + + if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); + +#if defined _WIN32 && defined _MSC_VER + __try + { + memcpy( buf, data, size ); + } + __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + { + success = false; + } + +#elif defined _WIN32 && defined __GNUC__ + // Send through the pipe to ensure safe reads on compilers with no __try/__except + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + int result1, result2; + + // ENOSPC indicates that there is no more space to execute write operation + // other possible values: + // EBADF - invalid file descriptor or not opened for writing + // EINVAL - null buffer or odd number of bytes in unicode mode + while( ( result1 = _write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno != ENOSPC ) { /* retry */ } + if( result1 < 0 ) + { + success = false; + break; + } + + // EBADF - errno set to this value if pipe is not opened for reading or locked + // other possible values: + // EINVAL - result1 > INT_MAX + while( ( result2 = _read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno != EBADF ) { /* retry */ } + if( result2 != result1 ) + { + success = false; + break; + } + offset += result1; + } +#else + // Send through the pipe to ensure safe reads + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + ssize_t result1, result2; + while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result1 < 0 ) + { + success = false; + break; + } + while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result2 != result1 ) + { + success = false; + break; + } + offset += result1; + } +#endif + + if( success ) return buf; + + SafeCopyEpilog( buf ); + return nullptr; +} + +void Profiler::SafeCopyEpilog( char* buf ) +{ + if( buf != m_safeSendBuffer ) tracy_free( buf ); + +#ifndef NDEBUG + m_inUse.store( false ); +#endif +} + +bool Profiler::SendData( const char* data, size_t len ) +{ + const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); + memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) ); + return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; +} + +void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type ) +{ + assert( type == QueueType::StringData || + type == QueueType::ThreadName || + type == QueueType::PlotName || + type == QueueType::FrameName || + type == QueueType::ExternalName || + type == QueueType::ExternalThreadName || + type == QueueType::FiberName ); + + QueueItem item; + MemWrite( &item.hdr.type, type ); + MemWrite( &item.stringTransfer.ptr, str ); + + assert( len <= std::numeric_limits::max() ); + auto l16 = uint16_t( len ); + + NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)type] ); + AppendDataUnsafe( &l16, sizeof( l16 ) ); + AppendDataUnsafe( ptr, l16 ); +} + +void Profiler::SendSingleString( const char* ptr, size_t len ) +{ + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SingleStringData ); + + assert( len <= std::numeric_limits::max() ); + auto l16 = uint16_t( len ); + + NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] ); + AppendDataUnsafe( &l16, sizeof( l16 ) ); + AppendDataUnsafe( ptr, l16 ); +} + +void Profiler::SendSecondString( const char* ptr, size_t len ) +{ + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SecondStringData ); + + assert( len <= std::numeric_limits::max() ); + auto l16 = uint16_t( len ); + + NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] ); + AppendDataUnsafe( &l16, sizeof( l16 ) ); + AppendDataUnsafe( ptr, l16 ); +} + +void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) +{ + assert( type == QueueType::FrameImageData || + type == QueueType::SymbolCode || + type == QueueType::SourceCode ); + + QueueItem item; + MemWrite( &item.hdr.type, type ); + MemWrite( &item.stringTransfer.ptr, str ); + + assert( len <= std::numeric_limits::max() ); + assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize ); + auto l32 = uint32_t( len ); + + NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)type] ); + AppendDataUnsafe( &l32, sizeof( l32 ) ); + AppendDataUnsafe( ptr, l32 ); +} + +void Profiler::SendSourceLocation( uint64_t ptr ) +{ + auto srcloc = (const SourceLocationData*)ptr; + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SourceLocation ); + MemWrite( &item.srcloc.name, (uint64_t)srcloc->name ); + MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); + MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); + MemWrite( &item.srcloc.line, srcloc->line ); + MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) ); + MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); + MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); + AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); +} + +void Profiler::SendSourceLocationPayload( uint64_t _ptr ) +{ + auto ptr = (const char*)_ptr; + + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); + MemWrite( &item.stringTransfer.ptr, _ptr ); + + uint16_t len; + memcpy( &len, ptr, sizeof( len ) ); + assert( len > 2 ); + len -= 2; + ptr += 2; + + NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len ); + + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); + AppendDataUnsafe( &len, sizeof( len ) ); + AppendDataUnsafe( ptr, len ); +} + +void Profiler::SendCallstackPayload( uint64_t _ptr ) +{ + auto ptr = (uintptr_t*)_ptr; + + QueueItem item; + MemWrite( &item.hdr.type, QueueType::CallstackPayload ); + MemWrite( &item.stringTransfer.ptr, _ptr ); + + const auto sz = *ptr++; + const auto len = sz * sizeof( uint64_t ); + const auto l16 = uint16_t( len ); + + NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] ); + AppendDataUnsafe( &l16, sizeof( l16 ) ); + + if( compile_time_condition::value ) + { + AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz ); + } + else + { + for( uintptr_t i=0; i> 63 != 0 ) + { + SendSingleString( "" ); + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SymbolInformation ); + MemWrite( &item.symbolInformation.line, 0 ); + MemWrite( &item.symbolInformation.symAddr, symbol ); + AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] ); + } + else + { + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } ); + } +#else + AckServerQuery(); +#endif +} + +void Profiler::QueueExternalName( uint64_t ptr ) +{ +#ifdef TRACY_HAS_SYSTEM_TRACING + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } ); +#endif +} + +void Profiler::QueueKernelCode( uint64_t symbol, uint32_t size ) +{ + assert( symbol >> 63 != 0 ); +#ifdef TRACY_HAS_CALLSTACK + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::KernelCode, symbol, size } ); +#else + AckSymbolCodeNotAvailable(); +#endif +} + +void Profiler::QueueSourceCodeQuery( uint32_t id ) +{ + assert( m_exectime != 0 ); + assert( m_queryData ); + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SourceCode, uint64_t( m_queryData ), uint64_t( m_queryImage ), id } ); + m_queryData = nullptr; + m_queryImage = nullptr; +} + +#ifdef TRACY_HAS_CALLSTACK +void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) +{ + switch( si.type ) + { + case SymbolQueueItemType::CallstackFrame: + { + const auto frameData = DecodeCallstackPtr( si.ptr ); + auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size ); + memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size ); + TracyLfqPrepare( QueueType::CallstackFrameSize ); + MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr ); + MemWrite( &item->callstackFrameSizeFat.size, frameData.size ); + MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data ); + MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName ); + TracyLfqCommit; + break; + } + case SymbolQueueItemType::SymbolQuery: + { +#ifdef __ANDROID__ + // On Android it's common for code to be in mappings that are only executable + // but not readable. + if( !EnsureReadable( si.ptr ) ) + { + TracyLfqPrepare( QueueType::AckServerQueryNoop ); + TracyLfqCommit; + break; + } +#endif + const auto sym = DecodeSymbolAddress( si.ptr ); + TracyLfqPrepare( QueueType::SymbolInformation ); + MemWrite( &item->symbolInformationFat.line, sym.line ); + MemWrite( &item->symbolInformationFat.symAddr, si.ptr ); + MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file ); + MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree ); + TracyLfqCommit; + break; + } +#ifdef TRACY_HAS_SYSTEM_TRACING + case SymbolQueueItemType::ExternalName: + { + const char* threadName; + const char* name; + SysTraceGetExternalName( si.ptr, threadName, name ); + TracyLfqPrepare( QueueType::ExternalNameMetadata ); + MemWrite( &item->externalNameMetadata.thread, si.ptr ); + MemWrite( &item->externalNameMetadata.name, (uint64_t)name ); + MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName ); + TracyLfqCommit; + break; + } +#endif + case SymbolQueueItemType::KernelCode: + { +#ifdef _WIN32 + auto mod = GetKernelModulePath( si.ptr ); + if( mod ) + { + auto fn = DecodeCallstackPtrFast( si.ptr ); + if( *fn ) + { + auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); + if( hnd ) + { + auto ptr = (const void*)GetProcAddress( hnd, fn ); + if( ptr ) + { + auto buf = (char*)tracy_malloc( si.extra ); + memcpy( buf, ptr, si.extra ); + FreeLibrary( hnd ); + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } + FreeLibrary( hnd ); + } + } + } +#elif defined __linux__ + void* data = m_kcore->Retrieve( si.ptr, si.extra ); + if( data ) + { + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } +#endif + TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); + TracyLfqCommit; + break; + } + case SymbolQueueItemType::SourceCode: + HandleSourceCodeQuery( (char*)si.ptr, (char*)si.extra, si.id ); + break; + default: + assert( false ); + break; + } +} + +void Profiler::SymbolWorker() +{ +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + s_symbolTid = syscall( SYS_gettid ); +#endif + + ThreadExitHandler threadExitHandler; + SetThreadName( "Tracy Symbol Worker" ); +#ifdef TRACY_USE_RPMALLOC + InitRpmalloc(); +#endif + InitCallstack(); + while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + + for(;;) + { + const auto shouldExit = ShouldExit(); +#ifdef TRACY_ON_DEMAND + if( !IsConnected() ) + { + if( shouldExit ) + { + s_symbolThreadGone.store( true, std::memory_order_release ); + return; + } + while( m_symbolQueue.front() ) m_symbolQueue.pop(); + std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); + continue; + } +#endif + auto si = m_symbolQueue.front(); + if( si ) + { + HandleSymbolQueueItem( *si ); + m_symbolQueue.pop(); + } + else + { + if( shouldExit ) + { + s_symbolThreadGone.store( true, std::memory_order_release ); + return; + } + std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); + } + } +} +#endif + +bool Profiler::HandleServerQuery() +{ + ServerQueryPacket payload; + if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false; + + uint8_t type; + uint64_t ptr; + memcpy( &type, &payload.type, sizeof( payload.type ) ); + memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) ); + + switch( type ) + { + case ServerQueryString: + SendString( ptr, (const char*)ptr, QueueType::StringData ); + break; + case ServerQueryThreadString: + if( ptr == m_mainThread ) + { + SendString( ptr, "Main thread", 11, QueueType::ThreadName ); + } + else + { + auto t = GetThreadNameData( (uint32_t)ptr ); + if( t ) + { + SendString( ptr, t->name, QueueType::ThreadName ); + if( t->groupHint != 0 ) + { + TracyLfqPrepare( QueueType::ThreadGroupHint ); + MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); + MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); + TracyLfqCommit; + } + } + else + { + SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); + } + } + break; + case ServerQuerySourceLocation: + SendSourceLocation( ptr ); + break; + case ServerQueryPlotName: + SendString( ptr, (const char*)ptr, QueueType::PlotName ); + break; + case ServerQueryTerminate: + return false; + case ServerQueryCallstackFrame: + QueueCallstackFrame( ptr ); + break; + case ServerQueryFrameName: + SendString( ptr, (const char*)ptr, QueueType::FrameName ); + break; + case ServerQueryDisconnect: + HandleDisconnect(); + return false; +#ifdef TRACY_HAS_SYSTEM_TRACING + case ServerQueryExternalName: + QueueExternalName( ptr ); + break; +#endif + case ServerQueryParameter: + HandleParameter( ptr ); + break; + case ServerQuerySymbol: + QueueSymbolQuery( ptr ); + break; +#ifndef TRACY_NO_CODE_TRANSFER + case ServerQuerySymbolCode: + HandleSymbolCodeQuery( ptr, payload.extra ); + break; +#endif + case ServerQuerySourceCode: + QueueSourceCodeQuery( uint32_t( ptr ) ); + break; + case ServerQueryDataTransfer: + if( m_queryData ) + { + assert( !m_queryImage ); + m_queryImage = m_queryData; + } + m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 ); + AckServerQuery(); + break; + case ServerQueryDataTransferPart: + memcpy( m_queryDataPtr, &ptr, 8 ); + memcpy( m_queryDataPtr+8, &payload.extra, 4 ); + m_queryDataPtr += 12; + AckServerQuery(); + break; +#ifdef TRACY_FIBERS + case ServerQueryFiberName: + SendString( ptr, (const char*)ptr, QueueType::FiberName ); + break; +#endif + default: + assert( false ); + break; + } + + return true; +} + +void Profiler::HandleDisconnect() +{ + moodycamel::ConsumerToken token( GetQueue() ); + +#ifdef TRACY_HAS_SYSTEM_TRACING + if( s_sysTraceThread ) + { + auto timestamp = GetTime(); + for(;;) + { + const auto status = DequeueContextSwitches( token, timestamp ); + if( status == DequeueStatus::ConnectionLost ) + { + return; + } + else if( status == DequeueStatus::QueueEmpty ) + { + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) return; + } + } + if( timestamp < 0 ) + { + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) return; + } + break; + } + ClearSerial(); + if( m_sock->HasData() ) + { + while( m_sock->HasData() ) + { + if( !HandleServerQuery() ) return; + } + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) return; + } + } + else + { + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) return; + } + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } + } + } +#endif + + QueueItem terminate; + MemWrite( &terminate.hdr.type, QueueType::Terminate ); + if( !SendData( (const char*)&terminate, 1 ) ) return; + for(;;) + { + ClearQueues( token ); + if( m_sock->HasData() ) + { + while( m_sock->HasData() ) + { + if( !HandleServerQuery() ) return; + } + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) return; + } + } + else + { + if( m_bufferOffset != m_bufferStart ) + { + if( !CommitData() ) return; + } + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } + } +} + +void Profiler::CalibrateTimer() +{ + m_timerMul = 1.; + +#ifdef TRACY_HW_TIMER + +# if !defined TRACY_TIMER_QPC && defined TRACY_TIMER_FALLBACK + const bool needCalibration = HardwareSupportsInvariantTSC(); +# else + const bool needCalibration = true; +# endif + if( needCalibration ) + { + std::atomic_signal_fence( std::memory_order_acq_rel ); + const auto t0 = std::chrono::high_resolution_clock::now(); + const auto r0 = GetTime(); + std::atomic_signal_fence( std::memory_order_acq_rel ); + std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) ); + std::atomic_signal_fence( std::memory_order_acq_rel ); + const auto t1 = std::chrono::high_resolution_clock::now(); + const auto r1 = GetTime(); + std::atomic_signal_fence( std::memory_order_acq_rel ); + + const auto dt = std::chrono::duration_cast( t1 - t0 ).count(); + const auto dr = r1 - r0; + + m_timerMul = double( dt ) / double( dr ); + } +#endif +} + +void Profiler::CalibrateDelay() +{ + constexpr int Iterations = 50000; + + auto mindiff = std::numeric_limits::max(); + for( int i=0; i 0 && dti < mindiff ) mindiff = dti; + } + m_resolution = mindiff; + +#ifdef TRACY_DELAYED_INIT + m_delay = m_resolution; +#else + constexpr int Events = Iterations * 2; // start + end + static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); + + static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; + const auto t0 = GetTime(); + for( int i=0; izoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); + TracyLfqCommit; + } + { + TracyLfqPrepare( QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.time, GetTime() ); + TracyLfqCommit; + } + } + const auto t1 = GetTime(); + const auto dt = t1 - t0; + m_delay = dt / Events; + + moodycamel::ConsumerToken token( GetQueue() ); + int left = Events; + while( left != 0 ) + { + const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} ); + assert( sz > 0 ); + left -= (int)sz; + } + assert( GetQueue().size_approx() == 0 ); +#endif +} + +void Profiler::ReportTopology() +{ +#ifndef TRACY_DELAYED_INIT + struct CpuData + { + uint32_t package; + uint32_t die; + uint32_t core; + uint32_t thread; + }; + +#if defined _WIN32 +# ifdef TRACY_UWP + t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx; +# else + t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" ); +# endif + if( !_GetLogicalProcessorInformationEx ) return; + + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* packageInfo = nullptr; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* dieInfo = nullptr; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* coreInfo = nullptr; + + DWORD psz = 0; + _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); + assert( res ); + } + else + { + psz = 0; + } + + DWORD dsz = 0; + _GetLogicalProcessorInformationEx( RelationProcessorDie, nullptr, &dsz ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + dieInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( dsz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorDie, dieInfo, &dsz ); + assert( res ); + } + else + { + dsz = 0; + } + + DWORD csz = 0; + _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); + assert( res ); + } + else + { + csz = 0; + } + + SYSTEM_INFO sysinfo; + GetSystemInfo( &sysinfo ); + const uint32_t numcpus = sysinfo.dwNumberOfProcessors; + + auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); + memset( cpuData, 0, sizeof( CpuData ) * numcpus ); + for( uint32_t i=0; iRelationship == RelationProcessorPackage ); + // FIXME account for GroupCount + auto mask = ptr->Processor.GroupMask[0].Mask; + int core = 0; + while( mask != 0 ) + { + if( mask & 1 ) cpuData[core].package = idx; + core++; + mask >>= 1; + } + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); + idx++; + } + + idx = 0; + ptr = dieInfo; + while( (char*)ptr < ((char*)dieInfo) + dsz ) + { + assert( ptr->Relationship == RelationProcessorDie ); + // FIXME account for GroupCount + auto mask = ptr->Processor.GroupMask[0].Mask; + int core = 0; + while( mask != 0 ) + { + if( mask & 1 ) cpuData[core].die = idx; + core++; + mask >>= 1; + } + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); + idx++; + } + + idx = 0; + ptr = coreInfo; + while( (char*)ptr < ((char*)coreInfo) + csz ) + { + assert( ptr->Relationship == RelationProcessorCore ); + // FIXME account for GroupCount + auto mask = ptr->Processor.GroupMask[0].Mask; + int core = 0; + while( mask != 0 ) + { + if( mask & 1 ) cpuData[core].core = idx; + core++; + mask >>= 1; + } + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); + idx++; + } + + for( uint32_t i=0; icpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); + MemWrite( &item->cpuTopology.core, data.core ); + MemWrite( &item->cpuTopology.thread, data.thread ); + +#ifdef TRACY_ON_DEMAND + DeferItem( *item ); +#endif + + TracyLfqCommit; + } + + tracy_free( cpuData ); + tracy_free( coreInfo ); + tracy_free( packageInfo ); +#elif defined __linux__ + const int numcpus = std::thread::hardware_concurrency(); + auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); + memset( cpuData, 0, sizeof( CpuData ) * numcpus ); + + const char* basePath = "/sys/devices/system/cpu/cpu"; + for( int i=0; icpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); + MemWrite( &item->cpuTopology.core, data.core ); + MemWrite( &item->cpuTopology.thread, data.thread ); + +#ifdef TRACY_ON_DEMAND + DeferItem( *item ); +#endif + + TracyLfqCommit; + } + + tracy_free( cpuData ); +#endif +#endif +} + +void Profiler::SendCallstack( int32_t depth, const char* skipBefore ) +{ +#ifdef TRACY_HAS_CALLSTACK + auto ptr = Callstack( depth ); + CutCallstack( ptr, skipBefore ); + + TracyQueuePrepare( QueueType::Callstack ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + TracyQueueCommit( callstackFatThread ); +#endif +} + +void Profiler::CutCallstack( void* callstack, const char* skipBefore ) +{ +#ifdef TRACY_HAS_CALLSTACK + auto data = (uintptr_t*)callstack; + const auto sz = *data++; + uintptr_t i; + for( i=0; i 100000000 ) // 100 ms + { + auto sysTime = m_sysTime.Get(); + if( sysTime >= 0 ) + { + m_sysTimeLast = t; + + TracyLfqPrepare( QueueType::SysTimeReport ); + MemWrite( &item->sysTime.time, GetTime() ); + MemWrite( &item->sysTime.sysTime, sysTime ); + TracyLfqCommit; + } + } +} +#endif + +void Profiler::HandleParameter( uint64_t payload ) +{ + assert( m_paramCallback ); + const auto idx = uint32_t( payload >> 32 ); + const auto val = int32_t( payload & 0xFFFFFFFF ); + m_paramCallback( m_paramCallbackData, idx, val ); + AckServerQuery(); +} + +void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) +{ + if( symbol >> 63 != 0 ) + { + QueueKernelCode( symbol, size ); + } + else + { + auto&& lambda = [ this, symbol ]( const char* buf, size_t size ) { + SendLongString( symbol, buf, size, QueueType::SymbolCode ); + }; + + // 'symbol' may have come from a module that has since unloaded, perform a safe copy before sending + if( !WithSafeCopy( (const char*)symbol, size, lambda ) ) AckSymbolCodeNotAvailable(); + } +} + +void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) +{ + bool ok = false; + FILE* f = fopen( data, "rb" ); + if( f ) + { + struct stat st; + if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) + { + auto ptr = (char*)tracy_malloc_fast( st.st_size ); + auto rd = fread( ptr, 1, st.st_size, f ); + if( rd == (size_t)st.st_size ) + { + TracyLfqPrepare( QueueType::SourceCodeMetadata ); + MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); + MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); + MemWrite( &item->sourceCodeMetadata.id, id ); + TracyLfqCommit; + ok = true; + } + else + { + tracy_free_fast( ptr ); + } + } + fclose( f ); + } + +#ifdef TRACY_DEBUGINFOD + else if( image && data[0] == '/' ) + { + size_t size; + auto buildid = GetBuildIdForImage( image, size ); + if( buildid ) + { + auto d = debuginfod_find_source( GetDebuginfodClient(), buildid, size, data, nullptr ); + TracyDebug( "DebugInfo source query: %s, fn: %s, image: %s\n", d >= 0 ? " ok " : "fail", data, image ); + if( d >= 0 ) + { + struct stat st; + fstat( d, &st ); + if( st.st_size < ( TargetFrameSize - 16 ) ) + { + lseek( d, 0, SEEK_SET ); + auto ptr = (char*)tracy_malloc_fast( st.st_size ); + auto rd = read( d, ptr, st.st_size ); + if( rd == (size_t)st.st_size ) + { + TracyLfqPrepare( QueueType::SourceCodeMetadata ); + MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); + MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); + MemWrite( &item->sourceCodeMetadata.id, id ); + TracyLfqCommit; + ok = true; + } + else + { + tracy_free_fast( ptr ); + } + } + close( d ); + } + } + } + else + { + TracyDebug( "DebugInfo invalid query fn: %s, image: %s\n", data, image ); + } +#endif + + if( !ok && m_sourceCallback ) + { + size_t sz; + char* ptr = m_sourceCallback( m_sourceCallbackData, data, sz ); + if( ptr ) + { + if( sz < ( TargetFrameSize - 16 ) ) + { + TracyLfqPrepare( QueueType::SourceCodeMetadata ); + MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); + MemWrite( &item->sourceCodeMetadata.size, (uint32_t)sz ); + MemWrite( &item->sourceCodeMetadata.id, id ); + TracyLfqCommit; + ok = true; + } + else + { + tracy_free_fast( ptr ); + } + } + } + + if( !ok ) + { + TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable ); + MemWrite( &item->sourceCodeNotAvailable, id ); + TracyLfqCommit; + } + + tracy_free_fast( data ); + tracy_free_fast( image ); +} + +#if defined _WIN32 && defined TRACY_TIMER_QPC +int64_t Profiler::GetTimeQpc() +{ + LARGE_INTEGER t; + QueryPerformanceCounter( &t ); + return t.QuadPart; +} +#endif + +} + +#ifdef __cplusplus +extern "C" { +#endif + +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ) +{ + ___tracy_c_zone_context ctx; +#ifdef TRACY_ON_DEMAND + ctx.active = active && tracy::GetProfiler().IsConnected(); +#else + ctx.active = active; +#endif + if( !ctx.active ) return ctx; + const auto id = tracy::GetProfiler().GetNextZoneId(); + ctx.id = id; + +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneBegin ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); + TracyQueueCommitC( zoneBeginThread ); + } + return ctx; +} + +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ) +{ + ___tracy_c_zone_context ctx; +#ifdef TRACY_ON_DEMAND + ctx.active = active && tracy::GetProfiler().IsConnected(); +#else + ctx.active = active; +#endif + if( !ctx.active ) return ctx; + const auto id = tracy::GetProfiler().GetNextZoneId(); + ctx.id = id; + +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + auto zoneQueue = tracy::QueueType::ZoneBegin; + if( depth > 0 && tracy::has_callstack() ) + { + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginCallstack; + } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); + TracyQueueCommitC( zoneBeginThread ); + + return ctx; +} + +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ) +{ + ___tracy_c_zone_context ctx; +#ifdef TRACY_ON_DEMAND + ctx.active = active && tracy::GetProfiler().IsConnected(); +#else + ctx.active = active; +#endif + if( !ctx.active ) + { + tracy::tracy_free( (void*)srcloc ); + return ctx; + } + const auto id = tracy::GetProfiler().GetNextZoneId(); + ctx.id = id; + +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommitC( zoneBeginThread ); + } + return ctx; +} + +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ) +{ + ___tracy_c_zone_context ctx; +#ifdef TRACY_ON_DEMAND + ctx.active = active && tracy::GetProfiler().IsConnected(); +#else + ctx.active = active; +#endif + if( !ctx.active ) + { + tracy::tracy_free( (void*)srcloc ); + return ctx; + } + const auto id = tracy::GetProfiler().GetNextZoneId(); + ctx.id = id; + +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + auto zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && tracy::has_callstack() ) + { + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLocCallstack; + } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommitC( zoneBeginThread ); + + return ctx; +} + +TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) +{ + if( !ctx.active ) return; +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, ctx.id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneEnd ); + tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() ); + TracyQueueCommitC( zoneEndThread ); + } +} + +TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) +{ + assert( size < std::numeric_limits::max() ); + if( !ctx.active ) return; + auto ptr = (char*)tracy::tracy_malloc( size ); + memcpy( ptr, txt, size ); +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, ctx.id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneText ); + tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommitC( zoneTextFatThread ); + } +} + +TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) +{ + assert( size < std::numeric_limits::max() ); + if( !ctx.active ) return; + auto ptr = (char*)tracy::tracy_malloc( size ); + memcpy( ptr, txt, size ); +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, ctx.id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneName ); + tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommitC( zoneTextFatThread ); + } +} + +TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) { + if( !ctx.active ) return; +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, ctx.id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneColor ); + tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); + tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); + tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); + TracyQueueCommitC( zoneColorThread ); + } +} + +TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) +{ + if( !ctx.active ) return; +#ifndef TRACY_NO_VERIFY + { + TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); + tracy::MemWrite( &item->zoneValidation.id, ctx.id ); + TracyQueueCommitC( zoneValidationThread ); + } +#endif + { + TracyQueuePrepareC( tracy::QueueType::ZoneValue ); + tracy::MemWrite( &item->zoneValue.value, value ); + TracyQueueCommitC( zoneValueThread ); + } +} + +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemFree( ptr, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ) { tracy::Profiler::MemDiscard( name, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemDiscardCallstack( name, secure != 0, depth ); + } + else + { + tracy::Profiler::MemDiscard( name, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); + } +} +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); + } +} +TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } +TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } +TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip != 0 ); } +TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } +TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } +TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step != 0, fill != 0, color ); } +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ) { tracy::Profiler::Message( txt, size, callstack_depth ); } +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ) { tracy::Profiler::Message( txt, callstack_depth ); } +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, size, color, callstack_depth ); } +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, color, callstack_depth ); } +TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } + +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); +} + +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); +} + +TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) +{ + tracy::GetProfiler().SendCallstack( data.depth ); + TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) +{ + tracy::GetProfiler().SendCallstack( data.depth ); + TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuTime ); + tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); + tracy::MemWrite( &item->gpuTime.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd ); + tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); + memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); + tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuNewContext ); + tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuNewContext.period, data.period ); + tracy::MemWrite( &item->gpuNewContext.context, data.context ); + tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); + tracy::MemWrite( &item->gpuNewContext.type, data.type ); + +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data ) +{ + auto ptr = (char*)tracy::tracy_malloc( data.len ); + memcpy( ptr, data.name, data.len ); + + TracyLfqPrepareC( tracy::QueueType::GpuContextName ); + tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); + tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); + +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuCalibration ); + tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); + tracy::MemWrite( &item->gpuCalibration.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + TracyLfqCommitC; +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) +{ + auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) +{ + auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); + tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); + tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); + tracy::MemWrite( &item->gpuTime.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); + tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); + memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); + tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); + tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); + tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuNewContext.period, data.period ); + tracy::MemWrite( &item->gpuNewContext.context, data.context ); + tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); + tracy::MemWrite( &item->gpuNewContext.type, data.type ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data ) +{ + auto ptr = (char*)tracy::tracy_malloc( data.len ); + memcpy( ptr, data.name, data.len ); + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); + tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); + tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); + tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); + tracy::MemWrite( &item->gpuCalibration.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +struct __tracy_lockable_context_data +{ + uint32_t m_id; +#ifdef TRACY_ON_DEMAND + std::atomic m_lockCount; + std::atomic m_active; +#endif +}; + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) +{ + struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); + lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); +#ifdef TRACY_ON_DEMAND + new(&lockdata->m_lockCount) std::atomic( 0 ); + new(&lockdata->m_active) std::atomic( false ); +#endif + assert( lockdata->m_id != (std::numeric_limits::max)() ); + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); + tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); + tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); + tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + + return lockdata; +} + +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); + tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); + tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.~atomic(); + lockdata->m_active.~atomic(); +#endif + tracy::tracy_free((void*)lockdata); +} + +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return static_cast(false); +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); + tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); + tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + return static_cast(true); +} + +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); + if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; + if( !tracy::GetProfiler().IsConnected() ) + { + lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); + tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); + tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ) +{ +#ifdef TRACY_ON_DEMAND + if( !acquired ) return; + + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return; +#endif + + if( acquired ) + { + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + } +} + +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) +{ +#ifdef TRACY_ON_DEMAND + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( !active ) return; + const auto connected = tracy::GetProfiler().IsConnected(); + if( !connected ) + { + if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); + tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); + tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) +{ + assert( nameSz < (std::numeric_limits::max)() ); + auto ptr = (char*)tracy::tracy_malloc( nameSz ); + memcpy( ptr, name, nameSz ); + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); + tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); + tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); + tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API int32_t ___tracy_connected( void ) +{ + return static_cast( tracy::GetProfiler().IsConnected() ); +} + +#ifdef TRACY_FIBERS +TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } +TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } +#endif + +# if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT +TRACY_API void ___tracy_startup_profiler( void ) +{ + tracy::StartupProfiler(); +} + +TRACY_API void ___tracy_shutdown_profiler( void ) +{ + tracy::ShutdownProfiler(); +} + +TRACY_API int32_t ___tracy_profiler_started( void ) +{ + return static_cast( tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ) ); +} +# endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/public/client/TracyProfiler.hpp b/public/client/TracyProfiler.hpp index 368889763c..7326af0462 100644 --- a/public/client/TracyProfiler.hpp +++ b/public/client/TracyProfiler.hpp @@ -1067,7 +1067,7 @@ class Profiler #if defined _WIN32 void* m_prevHandler; - #if defined __clang__ || defined __GNUC__ + #if defined __GNUC__ int m_pipe[2]; int m_pipeBufSize; #endif diff --git a/public/client/tracy_concurrentqueue.h b/public/client/tracy_concurrentqueue.h index 693d3b2c6b..4178d39ead 100644 --- a/public/client/tracy_concurrentqueue.h +++ b/public/client/tracy_concurrentqueue.h @@ -34,7 +34,7 @@ #include "../common/TracyForceInline.hpp" #include "../common/TracySystem.hpp" -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings // upon assigning any computed values) @@ -64,7 +64,7 @@ namespace tracy // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) inline bool cqLikely(bool x) { return __builtin_expect((x), true); } inline bool cqUnlikely(bool x) { return __builtin_expect((x), false); } #else @@ -1436,6 +1436,6 @@ inline void swap(ConsumerToken& a, ConsumerToken& b) noexcept } /* namespace tracy */ -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) #pragma GCC diagnostic pop #endif diff --git a/public/common/TracyForceInline.hpp b/public/common/TracyForceInline.hpp index 3803e96e0b..b6a5833e58 100644 --- a/public/common/TracyForceInline.hpp +++ b/public/common/TracyForceInline.hpp @@ -1,7 +1,7 @@ #ifndef __TRACYFORCEINLINE_HPP__ #define __TRACYFORCEINLINE_HPP__ -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) # define tracy_force_inline __attribute__((always_inline)) inline #elif defined(_MSC_VER) # define tracy_force_inline __forceinline @@ -9,7 +9,7 @@ # define tracy_force_inline inline #endif -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) # define tracy_no_inline __attribute__((noinline)) #elif defined(_MSC_VER) # define tracy_no_inline __declspec(noinline) diff --git a/public/common/tracy_lz4.cpp b/public/common/tracy_lz4.cpp index 7e78da9ddf..bb032bfc73 100644 --- a/public/common/tracy_lz4.cpp +++ b/public/common/tracy_lz4.cpp @@ -74,11 +74,11 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ -# if (defined(__GNUC__) || defined(__clang__)) && \ +# if defined(__GNUC__) && \ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define LZ4_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || (defined(__GNUC__) || defined(__clang__)) +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) # define LZ4_FORCE_MEMORY_ACCESS 1 # endif #endif diff --git a/server/TracyFileRead.hpp b/server/TracyFileRead.hpp index 10c62b06f1..5404ee342d 100644 --- a/server/TracyFileRead.hpp +++ b/server/TracyFileRead.hpp @@ -15,7 +15,7 @@ #include #include -#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +#if defined _MSC_VER || (defined _WIN32 && defined __GNUC__) // MSCV, gcc and clang compilers contain _stat64 # define stat64 _stat64 #endif diff --git a/server/TracyPopcnt.hpp b/server/TracyPopcnt.hpp index 945324fd91..3aa8308680 100644 --- a/server/TracyPopcnt.hpp +++ b/server/TracyPopcnt.hpp @@ -8,7 +8,7 @@ # include # define TracyCountBits __popcnt64 # define TracyLzcnt __lzcnt64 -#elif defined __GNUC__ || defined __clang__ +#elif defined __GNUC__ static inline uint64_t TracyCountBits( uint64_t i ) { return uint64_t( __builtin_popcountll( i ) ); diff --git a/server/TracyPrint.cpp b/server/TracyPrint.cpp index 95029f557e..8d3606ba4f 100644 --- a/server/TracyPrint.cpp +++ b/server/TracyPrint.cpp @@ -1,7 +1,7 @@ #ifdef _MSC_VER # pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data #endif -#if defined __MINGW32__ || defined __GNUC__ || defined __clang__ +#if defined __MINGW32__ || defined __GNUC__ # define __STDC_FORMAT_MACROS #endif diff --git a/server/TracyPrint.hpp b/server/TracyPrint.hpp index ed9b62bb8a..80b087e46f 100644 --- a/server/TracyPrint.hpp +++ b/server/TracyPrint.hpp @@ -16,7 +16,7 @@ # define NO_CHARCONV #endif -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ # define NO_CHARCONV #endif diff --git a/server/tracy_xxhash.h b/server/tracy_xxhash.h index 0b0f8d57a9..02438fcb73 100644 --- a/server/tracy_xxhash.h +++ b/server/tracy_xxhash.h @@ -260,7 +260,7 @@ extern "C" { # define XXH_STATIC_LINKING_ONLY /* make all functions private */ # undef XXH_PUBLIC_API -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline @@ -373,15 +373,15 @@ extern "C" { /*! @brief Marks a global symbol. */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllexport)) # elif # define XXH_PUBLIC_API __declspec(dllexport) # endif # elif XXH_IMPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllimport)) # elif # define XXH_PUBLIC_API __declspec(dllimport) @@ -457,15 +457,15 @@ extern "C" { /* specific declaration modes for Windows */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllexport)) # elif # define XXH_PUBLIC_API __declspec(dllexport) # endif # elif XXH_IMPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllimport)) # elif # define XXH_PUBLIC_API __declspec(dllimport) @@ -476,7 +476,7 @@ extern "C" { # endif #endif -#if defined (__GNUC__) || defined (__clang__) +#if defined (__GNUC__) # define XXH_CONSTF __attribute__((const)) # define XXH_PUREF __attribute__((pure)) # define XXH_MALLOCF __attribute__((malloc)) @@ -1494,7 +1494,7 @@ struct XXH64_state_s { #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ /* In C++ alignas() is a keyword */ # define XXH_ALIGN(n) alignas(n) -#elif defined(__GNUC__) || defined(__clang__) +#elif defined(__GNUC__) # define XXH_ALIGN(n) __attribute__ ((aligned(n))) #elif defined(_MSC_VER) # define XXH_ALIGN(n) __declspec(align(n)) @@ -1505,7 +1505,7 @@ struct XXH64_state_s { /* Old GCC versions only accept the attribute after the type in structures. */ #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && (defined(__GNUC__) || defined(__clang__)) + && defined(__GNUC__) # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) #else # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type @@ -1999,7 +1999,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, /* prefer __packed__ structures (method 1) for GCC * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy * which for some reason does unaligned loads. */ -# if (defined(__GNUC__) || defined(__clang__)) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif diff --git a/test/stb_image.h b/test/stb_image.h index 4c03e4a8d1..7a8b294fcd 100644 --- a/test/stb_image.h +++ b/test/stb_image.h @@ -728,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if defined (_MSC_VER && _MSC_VER >= 1400) // not VC6 +#if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { From c504eb872d06fbf07b436eafc3453b5aa668e5a8 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 23:01:08 +0200 Subject: [PATCH 05/15] config.cmake fix --- cmake/config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 1bdb25905e..4874684224 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -47,7 +47,7 @@ message("compiler = ${CMAKE_C_COMPILER_ID}") if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) # gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp - if(WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) + if(NOT WIN32 OR (WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"))) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) endif() endif() From b07ac82131e9579c2032662d4320fdc937b2f0fe Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 23:23:16 +0200 Subject: [PATCH 06/15] Removal of test line of code --- cmake/config.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 4874684224..ec655cbcff 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -43,8 +43,6 @@ if(EMSCRIPTEN) add_compile_options(-pthread -DIMGUI_IMPL_OPENGL_ES2) endif() -message("compiler = ${CMAKE_C_COMPILER_ID}") - if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) # gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp if(NOT WIN32 OR (WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"))) From 32c9a046e7d79bac450a86b69fe670e63b80b779 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 1 Jun 2025 00:35:48 +0200 Subject: [PATCH 07/15] GCC and Clang compatible on Windows Rebase --- cmake/config.cmake | 10 ++++++++-- cmake/server.cmake | 3 +++ public/client/TracyProfiler.cpp | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index d563c225a1..58e815ec98 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -12,7 +12,11 @@ if (NOT NO_ISA_EXTENSIONS) endif() endif() if(WIN32) - add_compile_options(/arch:AVX2) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") + add_compile_options(-mavx2) + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:AVX2) + endif() endif() endif() @@ -30,7 +34,9 @@ endif() if(WIN32) add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) - add_compile_options(/MP) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/MP) + endif() endif() if(EMSCRIPTEN) diff --git a/cmake/server.cmake b/cmake/server.cmake index a76d1c1340..cc3a9a924a 100644 --- a/cmake/server.cmake +++ b/cmake/server.cmake @@ -30,6 +30,9 @@ list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/") add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES}) target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR}) target_link_libraries(TracyServer PUBLIC TracyCapstone libzstd PPQSort::PPQSort) +if(WIN32) + target_link_libraries(TracyServer PRIVATE Ws2_32) +endif() if(NO_STATISTICS) target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS) endif() diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 22830765e5..76bae1dbfb 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -3132,6 +3132,7 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); #ifdef _WIN32 + # ifdef _MSC_VER __try { @@ -3144,6 +3145,7 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) # else memcpy( buf, data, size ); # endif + #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) From 5ba7c1bb472d40c03ec45c9cdd47b9f1d9d3007e Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 18:20:13 +0200 Subject: [PATCH 08/15] Compilation of profiler and project that uses Tracy now works with gcc and clang compilers on Windows. Rebase --- cmake/config.cmake | 5 +- import/src/import-chrome.cpp | 4 +- import/src/import-fuchsia.cpp | 5 +- import/src/json.hpp | 18 +- profiler/src/main.cpp | 8 + profiler/src/profiler/TracyView_Timeline.cpp | 8 + profiler/src/stb_image.h | 363 ++++++++++--------- public/TracyClient.cpp | 1 + public/client/TracyProfiler.cpp | 137 +++++-- public/client/TracyProfiler.hpp | 4 + public/client/tracy_concurrentqueue.h | 6 +- public/common/TracyForceInline.hpp | 4 +- public/common/TracySocket.cpp | 1 + public/common/tracy_lz4.cpp | 10 +- public/common/tracy_lz4.hpp | 2 +- public/common/tracy_lz4hc.cpp | 3 + server/TracyFileRead.hpp | 3 +- server/TracyPrint.cpp | 2 +- server/TracyPrint.hpp | 2 +- server/tracy_robin_hood.h | 1 + server/tracy_xxhash.h | 52 ++- test/stb_image.h | 17 +- 22 files changed, 398 insertions(+), 258 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 58e815ec98..ad238cae4c 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -44,7 +44,10 @@ if(EMSCRIPTEN) endif() if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) + # Mingw gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp + if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang"))) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) + endif() endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/import/src/import-chrome.cpp b/import/src/import-chrome.cpp index 936d37d936..5a3040c338 100644 --- a/import/src/import-chrome.cpp +++ b/import/src/import-chrome.cpp @@ -11,9 +11,11 @@ #include #include -#ifdef _MSC_VER +#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +// all checked compilers contain _stat64 # define stat64 _stat64 #endif + #if defined __APPLE__ # define stat64 stat #endif diff --git a/import/src/import-fuchsia.cpp b/import/src/import-fuchsia.cpp index 785979c198..854273b2cd 100644 --- a/import/src/import-fuchsia.cpp +++ b/import/src/import-fuchsia.cpp @@ -20,8 +20,9 @@ #include #include -#ifdef _MSC_VER -#define stat64 _stat64 +#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +// all checked compilers contain _stat64 +# define stat64 _stat64 #endif #if defined __APPLE__ #define stat64 stat diff --git a/import/src/json.hpp b/import/src/json.hpp index 8b72ea6539..cca1a99925 100644 --- a/import/src/json.hpp +++ b/import/src/json.hpp @@ -16799,7 +16799,7 @@ class binary_writer void write_compact_float(const number_float_t n, detail::input_format_t format) { -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -16819,7 +16819,7 @@ class binary_writer : get_msgpack_float_prefix(n)); write_number(n); } -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif } @@ -17981,7 +17981,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '-'; } -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -17993,7 +17993,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '0'; return first; } -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif @@ -21187,7 +21187,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::negation>, detail::negation>, detail::negation>>, -#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) +#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) detail::negation>, #endif #if defined(JSON_HAS_CPP_17) && JSON_HAS_STATIC_RTTI @@ -22983,13 +22983,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ bool operator==(const_reference rhs) const noexcept { -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif const_reference lhs = *this; JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif } @@ -23087,12 +23087,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ friend bool operator==(const_reference lhs, const_reference rhs) noexcept { -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ #pragma GCC diagnostic pop #endif } diff --git a/profiler/src/main.cpp b/profiler/src/main.cpp index b10a668003..0b6d34cae8 100644 --- a/profiler/src/main.cpp +++ b/profiler/src/main.cpp @@ -19,6 +19,14 @@ # include #endif +#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) +//_WIN32 - only tested on windows + +// gcc throws error for not present std::pow function, +// clang does not have that problem +#include +#endif + #define STB_IMAGE_IMPLEMENTATION #define STBI_ONLY_PNG #include "stb_image.h" diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index a8c8b9e976..0e9768beb5 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -10,6 +10,14 @@ #include "TracyTimelineItemThread.hpp" #include "TracyView.hpp" +#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) +//_WIN32 - only tested on windows + +// gcc throws error for not present std::pow function, +// clang does not have that problem +#include +#endif + namespace tracy { diff --git a/profiler/src/stb_image.h b/profiler/src/stb_image.h index a632d54351..d0c064bb36 100644 --- a/profiler/src/stb_image.h +++ b/profiler/src/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,7 +48,6 @@ LICENSE RECENT REVISION HISTORY: - 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes @@ -631,7 +630,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #endif #ifndef STBI_THREAD_LOCAL - #if defined(__GNUC__) + #if defined(__GNUC__) && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #endif #endif @@ -659,12 +658,15 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NOTUSED(v) (void)sizeof(v) #endif -#ifdef _MSC_VER -#define STBI_HAS_LROTL -#endif -#ifdef STBI_HAS_LROTL +#ifdef _MSC_VER #define stbi_lrot(x,y) _lrotl(x,y) +#elif defined __clang__ + // 32bit version of function as stb image uses this function to rotate 32bit integers + #define stbi_lrot(x,y) __builtin_rotateleft32(x,y) +#elif defined __GNUC__ + // gcc built-in is type-generic with first argument being any unsigned integer and second any signed or unsigned integer or char + #define stbi_lrot(x,y) __builtin_stdc_rotate_left(x,y) #else #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif @@ -726,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if _MSC_VER >= 1400 // not VC6 +#if defined _MSC_VER && _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { @@ -1073,8 +1075,8 @@ static int stbi__addints_valid(int a, int b) return a <= INT_MAX - b; } -// returns 1 if the product of two ints fits in a signed short, 0 on overflow. -static int stbi__mul2shorts_valid(int a, int b) +// returns 1 if the product of two signed shorts is valid, 0 on overflow. +static int stbi__mul2shorts_valid(short a, short b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid @@ -3385,13 +3387,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } -static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { - stbi_uc x = stbi__get8(j->s); - while (x == 0xff) { // might be a marker + int x = stbi__get8(j->s); + while (x == 255) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { @@ -4177,7 +4179,6 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; - int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -4244,20 +4245,9 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { - if (!a->hit_zeof_once) { - // This is the first time we hit eof, insert 16 extra padding btis - // to allow us to keep going; if we actually consume any of them - // though, that is invalid data. This is caught later. - a->hit_zeof_once = 1; - a->num_bits += 16; // add 16 implicit zero bits - } else { - // We already inserted our extra 16 padding bits and are again - // out, this stream is actually prematurely terminated. - return -1; - } - } else { - stbi__fill_bits(a); + return -1; /* report error for unexpected end of data. */ } + stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { @@ -4322,13 +4312,6 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; - if (a->hit_zeof_once && a->num_bits < 16) { - // The first time we hit zeof, we inserted 16 extra zero bits into our bit - // buffer so the decoder can just do its speculative decoding. But if we - // actually consumed any of those bits (which is the case when num_bits < 16), - // the stream actually read past the end so it is malformed. - return stbi__err("unexpected end","Corrupt PNG"); - } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data @@ -4340,7 +4323,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (len > a->zout_end - zout) { + if (zout + len > a->zout_end) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -4484,7 +4467,6 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; - a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -4640,8 +4622,9 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filter used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first }; static stbi_uc first_row_filter[5] = @@ -4650,56 +4633,29 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub + STBI__F_paeth_first }; static int stbi__paeth(int a, int b, int c) { - // This formulation looks very different from the reference in the PNG spec, but is - // actually equivalent and has favorable data dependencies and admits straightforward - // generation of branch-free code, which helps performance significantly. - int thresh = c*3 - (a + b); - int lo = a < b ? a : b; - int hi = a < b ? b : a; - int t0 = (hi <= thresh) ? lo : c; - int t1 = (thresh <= lo) ? hi : t0; - return t1; + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; -// adds an extra all-255 alpha channel -// dest == src is legal -// img_n must be 1 or 3 -static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) -{ - int i; - // must process data backwards since we allow dest==src - if (img_n == 1) { - for (i=x-1; i >= 0; --i) { - dest[i*2+1] = 255; - dest[i*2+0] = src[i]; - } - } else { - STBI_ASSERT(img_n == 3); - for (i=x-1; i >= 0; --i) { - dest[i*4+3] = 255; - dest[i*4+2] = src[i*3+2]; - dest[i*4+1] = src[i*3+1]; - dest[i*4+0] = src[i*3+0]; - } - } -} - // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16 ? 2 : 1); + int bytes = (depth == 16? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; - stbi_uc *filter_buf; - int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4711,11 +4667,8 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); - // note: error exits here don't need to clean up a->out individually, - // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); - if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, @@ -4723,137 +4676,189 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); - // Allocate two scan lines worth of filter workspace buffer. - filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); - if (!filter_buf) return stbi__err("outofmem", "Out of memory"); - - // Filtering for low-bit-depth images - if (depth < 8) { - filter_bytes = 1; - width = img_width_bytes; - } - for (j=0; j < y; ++j) { - // cur/prior filter buffers alternate - stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; - stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; - stbi_uc *dest = a->out + stride*j; - int nk = width * filter_bytes; + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; int filter = *raw++; - // check filter type - if (filter > 4) { - all_ok = stbi__err("invalid filter","Corrupt PNG"); - break; + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // perform actual filtering - switch (filter) { - case STBI__F_none: - memcpy(cur, raw, nk); - break; - case STBI__F_sub: - memcpy(cur, raw, filter_bytes); - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); - break; - case STBI__F_up: - for (k = 0; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + prior[k]); - break; - case STBI__F_avg: - for (k = 0; k < filter_bytes; ++k) - cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); - break; - case STBI__F_paeth: - for (k = 0; k < filter_bytes; ++k) - cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); - break; - case STBI__F_avg_first: - memcpy(cur, raw, filter_bytes); - for (k = filter_bytes; k < nk; ++k) - cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); - break; + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } } - raw += nk; + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } - // expand decoded bits in cur to dest, also adding an extra alpha channel if desired - if (depth < 8) { + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range - stbi_uc *in = cur; - stbi_uc *out = dest; - stbi_uc inb = 0; - stbi__uint32 nsmp = x*img_n; - // expand bits to bytes first + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + if (depth == 4) { - for (i=0; i < nsmp; ++i) { - if ((i & 1) == 0) inb = *in++; - *out++ = scale * (inb >> 4); - inb <<= 4; + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); } + if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (i=0; i < nsmp; ++i) { - if ((i & 3) == 0) inb = *in++; - *out++ = scale * (inb >> 6); - inb <<= 2; + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); } - } else { - STBI_ASSERT(depth == 1); - for (i=0; i < nsmp; ++i) { - if ((i & 7) == 0) inb = *in++; - *out++ = scale * (inb >> 7); - inb <<= 1; + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - - // insert alpha=255 values if desired - if (img_n != out_n) - stbi__create_png_alpha_expand8(dest, dest, x, img_n); - } else if (depth == 8) { - if (img_n == out_n) - memcpy(dest, cur, x*img_n); - else - stbi__create_png_alpha_expand8(dest, cur, x, img_n); - } else if (depth == 16) { - // convert the image data from big-endian to platform-native - stbi__uint16 *dest16 = (stbi__uint16*)dest; - stbi__uint32 nsmp = x*img_n; - - if (img_n == out_n) { - for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) - *dest16 = (cur[0] << 8) | cur[1]; - } else { - STBI_ASSERT(img_n+1 == out_n); + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; if (img_n == 1) { - for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { - dest16[0] = (cur[0] << 8) | cur[1]; - dest16[1] = 0xffff; + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; } } else { STBI_ASSERT(img_n == 3); - for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { - dest16[0] = (cur[0] << 8) | cur[1]; - dest16[1] = (cur[2] << 8) | cur[3]; - dest16[2] = (cur[4] << 8) | cur[5]; - dest16[3] = 0xffff; + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; } } } } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } } - STBI_FREE(filter_buf); - if (!all_ok) return 0; - return 1; } diff --git a/public/TracyClient.cpp b/public/TracyClient.cpp index 6224f48bfe..69a5a6df70 100644 --- a/public/TracyClient.cpp +++ b/public/TracyClient.cpp @@ -51,6 +51,7 @@ #endif #ifdef _MSC_VER +// when gcc and clang linker options will be used # pragma comment(lib, "ws2_32.lib") # pragma comment(lib, "dbghelp.lib") # pragma comment(lib, "advapi32.lib") diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 76bae1dbfb..0b2f610ba2 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -12,6 +12,7 @@ # include "../common/TracyUwp.hpp" # ifndef _MSC_VER # include +# include # endif #else # include @@ -102,7 +103,7 @@ # define TRACY_DELAYED_INIT # endif #else -# ifdef __GNUC__ +# if defined __GNUC__ || defined __clang__ # define init_order( val ) __attribute__ ((init_priority(val))) # else # define init_order(x) @@ -115,6 +116,12 @@ extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); extern "C" typedef char* (WINAPI *t_WineGetVersion)(); extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); + +# if defined __clang__ || defined __GNUC__ + // _WIN32 +# include +#endif + #else # include # include @@ -295,6 +302,10 @@ static bool EnsureReadable( uintptr_t address ) } #endif +#if defined __linux__ + bool +#endif + #ifndef TRACY_DELAYED_INIT struct InitTimeWrapper @@ -1453,6 +1464,12 @@ Profiler::Profiler() s_token_detail = moodycamel::ProducerToken( s_queue ); s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; s_threadHandle = ThreadHandleWrapper { m_mainThread }; +# else + //#error FilipNur check if works + // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. + s_token_detail = moodycamel::ProducerToken( s_queue ); + s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; + s_threadHandle = ThreadHandleWrapper { m_mainThread }; # endif #endif @@ -1481,7 +1498,36 @@ Profiler::Profiler() m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); -#ifndef _WIN32 +#if defined _WIN32 && (defined __clang__ || defined __GNUC__) + + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + + { // scope for temporary variable originalHandlesCount + int originalHandlesCount = _getmaxstdio(); + + while(_pipe(m_pipe, m_pipeBufSize, _O_BINARY) != 0) + { + if ((errno == EMFILE) || (errno == ENFILE)) + { + // safe upper bound for exceptional situations + if(_getmaxstdio() > (originalHandlesCount + 10)) + { + throw std::runtime_error("Failed to create communication pipe!"); + } + + // as described by Raymond Chen (https://devblogs.microsoft.com/oldnewthing/20070718-00/?p=25963) + // max number of handles in windows is 10000, + // _getmaxstdio() at the start returns 512, so no fear of too much handles + _setmaxstdio(_getmaxstdio() + 1); + } + else + { + m_pipeBufSize /= 2; + } + } + } + +#elif !defined _WIN32 pipe(m_pipe); # if defined __APPLE__ || defined BSD // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default @@ -1646,6 +1692,10 @@ Profiler::~Profiler() #ifndef _WIN32 close( m_pipe[0] ); close( m_pipe[1] ); +#elif defined __clang__ || defined __GNUC__ + // _WIN32 + _close(m_pipe[0]); + _close(m_pipe[1]); #endif tracy_free( m_safeSendBuffer ); @@ -3142,8 +3192,35 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) { success = false; } -# else - memcpy( buf, data, size ); +# elif defined __GNUC__ + // Send through the pipe to ensure safe reads on compilers with no __try/__except + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + int result1, result2; + + // ENOSPC indicates that there is no more space to execute write operation + // other possible values: + // EBADF - invalid file descriptor or not opened for writing + // EINVAL - null buffer or odd number of bytes in unicode mode + while( ( result1 = _write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno != ENOSPC ) { /* retry */ } + if( result1 < 0 ) + { + success = false; + break; + } + + // EBADF - errno set to this value if pipe is not opened for reading or locked + // other possible values: + // EINVAL - result1 > INT_MAX + while( ( result2 = _read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno != EBADF ) { /* retry */ } + if( result2 != result1 ) + { + success = false; + break; + } + offset += result1; + } # endif #else @@ -3483,32 +3560,32 @@ void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) case SymbolQueueItemType::KernelCode: { #ifdef _WIN32 - auto mod = GetKernelModulePath( si.ptr ); - if( mod ) - { - auto fn = DecodeCallstackPtrFast( si.ptr ); - if( *fn ) - { - auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); - if( hnd ) - { - auto ptr = (const void*)GetProcAddress( hnd, fn ); - if( ptr ) - { - auto buf = (char*)tracy_malloc( si.extra ); - memcpy( buf, ptr, si.extra ); - FreeLibrary( hnd ); - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } - FreeLibrary( hnd ); - } - } - } + auto mod = GetKernelModulePath( si.ptr ); + if( mod ) + { + auto fn = DecodeCallstackPtrFast( si.ptr ); + if( *fn ) + { + auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); + if( hnd ) + { + auto ptr = (const void*)GetProcAddress( hnd, fn ); + if( ptr ) + { + auto buf = (char*)tracy_malloc( si.extra ); + memcpy( buf, ptr, si.extra ); + FreeLibrary( hnd ); + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } + FreeLibrary( hnd ); + } + } + } #elif defined __linux__ void* data = m_kcore->Retrieve( si.ptr, si.extra ); if( data ) diff --git a/public/client/TracyProfiler.hpp b/public/client/TracyProfiler.hpp index 8d16905860..368889763c 100644 --- a/public/client/TracyProfiler.hpp +++ b/public/client/TracyProfiler.hpp @@ -1067,6 +1067,10 @@ class Profiler #if defined _WIN32 void* m_prevHandler; + #if defined __clang__ || defined __GNUC__ + int m_pipe[2]; + int m_pipeBufSize; + #endif #else int m_pipe[2]; int m_pipeBufSize; diff --git a/public/client/tracy_concurrentqueue.h b/public/client/tracy_concurrentqueue.h index 4178d39ead..693d3b2c6b 100644 --- a/public/client/tracy_concurrentqueue.h +++ b/public/client/tracy_concurrentqueue.h @@ -34,7 +34,7 @@ #include "../common/TracyForceInline.hpp" #include "../common/TracySystem.hpp" -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings // upon assigning any computed values) @@ -64,7 +64,7 @@ namespace tracy // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) inline bool cqLikely(bool x) { return __builtin_expect((x), true); } inline bool cqUnlikely(bool x) { return __builtin_expect((x), false); } #else @@ -1436,6 +1436,6 @@ inline void swap(ConsumerToken& a, ConsumerToken& b) noexcept } /* namespace tracy */ -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif diff --git a/public/common/TracyForceInline.hpp b/public/common/TracyForceInline.hpp index b6a5833e58..3803e96e0b 100644 --- a/public/common/TracyForceInline.hpp +++ b/public/common/TracyForceInline.hpp @@ -1,7 +1,7 @@ #ifndef __TRACYFORCEINLINE_HPP__ #define __TRACYFORCEINLINE_HPP__ -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) # define tracy_force_inline __attribute__((always_inline)) inline #elif defined(_MSC_VER) # define tracy_force_inline __forceinline @@ -9,7 +9,7 @@ # define tracy_force_inline inline #endif -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) # define tracy_no_inline __attribute__((noinline)) #elif defined(_MSC_VER) # define tracy_no_inline __declspec(noinline) diff --git a/public/common/TracySocket.cpp b/public/common/TracySocket.cpp index bdba361965..6938fcff4a 100644 --- a/public/common/TracySocket.cpp +++ b/public/common/TracySocket.cpp @@ -22,6 +22,7 @@ # endif # define poll WSAPoll # ifdef _MSC_VER + // for gcc and clang added with linker options # pragma comment(lib, "ws2_32.lib") # endif #else diff --git a/public/common/tracy_lz4.cpp b/public/common/tracy_lz4.cpp index 15d0990f82..7e78da9ddf 100644 --- a/public/common/tracy_lz4.cpp +++ b/public/common/tracy_lz4.cpp @@ -74,11 +74,11 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ -# if defined(__GNUC__) && \ +# if (defined(__GNUC__) || defined(__clang__)) && \ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define LZ4_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || (defined(__GNUC__) || defined(__clang__)) # define LZ4_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -345,7 +345,9 @@ namespace tracy * environments. This is needed when decompressing the Linux Kernel, for example. */ #if !defined(LZ4_memcpy) -# if defined(__GNUC__) && (__GNUC__ >= 4) +# if defined(__clang__) +# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +# elif defined(__GNUC__) && (__GNUC__ >= 4) # define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) # else # define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) @@ -1283,7 +1285,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( } else { *op++ = (BYTE)(lastRun<= 4) +# if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) # define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) # else # define LZ4LIB_VISIBILITY diff --git a/public/common/tracy_lz4hc.cpp b/public/common/tracy_lz4hc.cpp index eec7239e05..bd0199353c 100644 --- a/public/common/tracy_lz4hc.cpp +++ b/public/common/tracy_lz4hc.cpp @@ -162,7 +162,10 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, #if defined(_MSC_VER) # define LZ4HC_rotl32(x,r) _rotl(x,r) +#elif defined(__clang__) +# define LZ4HC_rotl32(x,r) __builtin_rotateleft32(x,r) #else +// gcc does not provide builtin rotate left funciton for C++ (__builtin_stdc_rotate_left is available only in C) # define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) #endif diff --git a/server/TracyFileRead.hpp b/server/TracyFileRead.hpp index 988c7ae17e..10c62b06f1 100644 --- a/server/TracyFileRead.hpp +++ b/server/TracyFileRead.hpp @@ -15,7 +15,8 @@ #include #include -#ifdef _MSC_VER +#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +// MSCV, gcc and clang compilers contain _stat64 # define stat64 _stat64 #endif #if defined __APPLE__ || defined __FreeBSD__ diff --git a/server/TracyPrint.cpp b/server/TracyPrint.cpp index 9111ddb0bd..95029f557e 100644 --- a/server/TracyPrint.cpp +++ b/server/TracyPrint.cpp @@ -1,7 +1,7 @@ #ifdef _MSC_VER # pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data #endif -#ifdef __MINGW32__ +#if defined __MINGW32__ || defined __GNUC__ || defined __clang__ # define __STDC_FORMAT_MACROS #endif diff --git a/server/TracyPrint.hpp b/server/TracyPrint.hpp index d38245e359..ed9b62bb8a 100644 --- a/server/TracyPrint.hpp +++ b/server/TracyPrint.hpp @@ -16,7 +16,7 @@ # define NO_CHARCONV #endif -#ifdef __GNUC__ +#if defined __GNUC__ || defined __clang__ # define NO_CHARCONV #endif diff --git a/server/tracy_robin_hood.h b/server/tracy_robin_hood.h index 65f6fc1146..6dbcb9e82b 100644 --- a/server/tracy_robin_hood.h +++ b/server/tracy_robin_hood.h @@ -818,6 +818,7 @@ struct hash::value>::type> { } #if defined(__GNUC__) && !defined(__clang__) +// clang does not recognize -Wuseless-cast option # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wuseless-cast" #endif diff --git a/server/tracy_xxhash.h b/server/tracy_xxhash.h index a18e8c762d..0b0f8d57a9 100644 --- a/server/tracy_xxhash.h +++ b/server/tracy_xxhash.h @@ -260,7 +260,7 @@ extern "C" { # define XXH_STATIC_LINKING_ONLY /* make all functions private */ # undef XXH_PUBLIC_API -# if defined(__GNUC__) +# if defined(__GNUC__) || defined(__clang__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline @@ -373,11 +373,19 @@ extern "C" { /*! @brief Marks a global symbol. */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllexport)) +# elif +# define XXH_PUBLIC_API __declspec(dllexport) +# endif # elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllimport)) +# elif +# define XXH_PUBLIC_API __declspec(dllimport) +# endif # endif # else # define XXH_PUBLIC_API /* do nothing */ @@ -449,18 +457,26 @@ extern "C" { /* specific declaration modes for Windows */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllexport)) +# elif +# define XXH_PUBLIC_API __declspec(dllexport) +# endif # elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) +# if defined(__GNUC__) || defined(__clang__) +# __attribute__((dllimport)) +# elif +# define XXH_PUBLIC_API __declspec(dllimport) +# endif # endif # else # define XXH_PUBLIC_API /* do nothing */ # endif #endif -#if defined (__GNUC__) +#if defined (__GNUC__) || defined (__clang__) # define XXH_CONSTF __attribute__((const)) # define XXH_PUREF __attribute__((pure)) # define XXH_MALLOCF __attribute__((malloc)) @@ -1478,7 +1494,7 @@ struct XXH64_state_s { #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ /* In C++ alignas() is a keyword */ # define XXH_ALIGN(n) alignas(n) -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) # define XXH_ALIGN(n) __attribute__ ((aligned(n))) #elif defined(_MSC_VER) # define XXH_ALIGN(n) __declspec(align(n)) @@ -1489,7 +1505,7 @@ struct XXH64_state_s { /* Old GCC versions only accept the attribute after the type in structures. */ #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && defined(__GNUC__) + && (defined(__GNUC__) || defined(__clang__)) # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) #else # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type @@ -1983,7 +1999,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, /* prefer __packed__ structures (method 1) for GCC * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy * which for some reason does unaligned loads. */ -# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +# if (defined(__GNUC__) || defined(__clang__)) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -2413,6 +2429,7 @@ static int XXH_isLittleEndian(void) */ #if XXH_HAS_BUILTIN(__builtin_unreachable) +// gcc and clang should have this as builtin # define XXH_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) @@ -2443,13 +2460,14 @@ static int XXH_isLittleEndian(void) */ #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ && XXH_HAS_BUILTIN(__builtin_rotateleft64) -# define XXH_rotl32 __builtin_rotateleft32 -# define XXH_rotl64 __builtin_rotateleft64 +# define XXH_rotl32(x,r) __builtin_rotateleft32(x,r) +# define XXH_rotl64(x,r) __builtin_rotateleft64(x,r) /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ #elif defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else +// gcc does not provide builtin rotate left funciton for C++ (__builtin_stdc_rotate_left is available only in C) # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) #endif @@ -2464,7 +2482,8 @@ static int XXH_isLittleEndian(void) */ #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong -#elif XXH_GCC_VERSION >= 403 +#elif XXH_GCC_VERSION >= 403 || defined __clang__ +// XXH_GCC_VERSION >= 403 should be equivalent to if defined __GNUC__ # define XXH_swap32 __builtin_bswap32 #else static xxh_u32 XXH_swap32 (xxh_u32 x) @@ -3012,7 +3031,8 @@ static xxh_u64 XXH_read64(const void* memPtr) #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap64 _byteswap_uint64 -#elif XXH_GCC_VERSION >= 403 +#elif XXH_GCC_VERSION >= 403 || defined __clang__ +// XXH_GCC_VERSION >= 403 should be equivalent to if defined __GNUC__ # define XXH_swap64 __builtin_bswap64 #else static xxh_u64 XXH_swap64(xxh_u64 x) @@ -3953,7 +3973,7 @@ do { \ # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# elif ( defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) ) || defined (__clang__) # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # else # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ diff --git a/test/stb_image.h b/test/stb_image.h index 5e807a0a6e..4c03e4a8d1 100644 --- a/test/stb_image.h +++ b/test/stb_image.h @@ -621,7 +621,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local - #elif defined(__GNUC__) && __GNUC__ < 5 + #elif defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 5 #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) @@ -630,7 +630,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #endif #ifndef STBI_THREAD_LOCAL - #if defined(__GNUC__) + #if defined(__GNUC__) && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #endif #endif @@ -658,12 +658,15 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NOTUSED(v) (void)sizeof(v) #endif -#ifdef _MSC_VER -#define STBI_HAS_LROTL -#endif -#ifdef STBI_HAS_LROTL +#ifdef _MSC_VER #define stbi_lrot(x,y) _lrotl(x,y) +#elif defined __clang__ + // 32bit version of function as stb image uses this function to rotate 32bit integers + #define stbi_lrot(x,y) __builtin_rotateleft32(x,y) +#elif defined __GNUC__ + // gcc built-in is type-generic with first argument being any unsigned integer and second any signed or unsigned integer or char + #define stbi_lrot(x,y) __builtin_stdc_rotate_left(x,y) #else #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif @@ -725,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if _MSC_VER >= 1400 // not VC6 +#if defined (_MSC_VER && _MSC_VER >= 1400) // not VC6 #include // __cpuid static int stbi__cpuid3(void) { From 5b8a0fb27411d307842742b53de73f6443c10db1 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 18:39:09 +0200 Subject: [PATCH 09/15] Minor bug fix --- cmake/config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index ad238cae4c..691a241fb9 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -45,7 +45,7 @@ endif() if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) # Mingw gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp - if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang"))) + if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) endif() endif() From a86369ac58d97698653b4f98f7428579318296c7 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 22:43:50 +0200 Subject: [PATCH 10/15] Further tweaks and fixes --- cmake/config.cmake | 6 +- import/src/import-chrome.cpp | 2 +- import/src/import-fuchsia.cpp | 2 +- import/src/json.hpp | 18 +- profiler/src/main.cpp | 8 - profiler/src/profiler/TracyView_Timeline.cpp | 8 - profiler/src/stb_image.h | 353 +++++++++---------- public/TracyClient.cpp | 2 +- public/client/TracyProfiler.cpp | 10 +- public/client/TracyProfiler.hpp | 2 +- public/client/tracy_concurrentqueue.h | 6 +- public/common/TracyForceInline.hpp | 4 +- public/common/tracy_lz4.cpp | 4 +- server/TracyFileRead.hpp | 2 +- server/TracyPopcnt.hpp | 2 +- server/TracyPrint.cpp | 2 +- server/TracyPrint.hpp | 2 +- server/tracy_xxhash.h | 22 +- test/stb_image.h | 2 +- 19 files changed, 220 insertions(+), 237 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 691a241fb9..1bdb25905e 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -43,9 +43,11 @@ if(EMSCRIPTEN) add_compile_options(-pthread -DIMGUI_IMPL_OPENGL_ES2) endif() +message("compiler = ${CMAKE_C_COMPILER_ID}") + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) - # Mingw gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp - if(NOT (MINGW OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) + # gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp + if(WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) endif() endif() diff --git a/import/src/import-chrome.cpp b/import/src/import-chrome.cpp index 5a3040c338..6f20d8de84 100644 --- a/import/src/import-chrome.cpp +++ b/import/src/import-chrome.cpp @@ -11,7 +11,7 @@ #include #include -#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +#if defined _MSC_VER || (defined _WIN32 && defined __GNUC__) // all checked compilers contain _stat64 # define stat64 _stat64 #endif diff --git a/import/src/import-fuchsia.cpp b/import/src/import-fuchsia.cpp index 854273b2cd..7c87211596 100644 --- a/import/src/import-fuchsia.cpp +++ b/import/src/import-fuchsia.cpp @@ -20,7 +20,7 @@ #include #include -#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +#if defined _MSC_VER || (defined _WIN32 && defined __GNUC__) // all checked compilers contain _stat64 # define stat64 _stat64 #endif diff --git a/import/src/json.hpp b/import/src/json.hpp index cca1a99925..f7aeee4c6d 100644 --- a/import/src/json.hpp +++ b/import/src/json.hpp @@ -16799,7 +16799,7 @@ class binary_writer void write_compact_float(const number_float_t n, detail::input_format_t format) { -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -16819,7 +16819,7 @@ class binary_writer : get_msgpack_float_prefix(n)); write_number(n); } -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif } @@ -17981,7 +17981,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '-'; } -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -17993,7 +17993,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '0'; return first; } -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif @@ -21187,7 +21187,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::negation>, detail::negation>, detail::negation>>, -#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) +#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) detail::negation>, #endif #if defined(JSON_HAS_CPP_17) && JSON_HAS_STATIC_RTTI @@ -22983,13 +22983,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ bool operator==(const_reference rhs) const noexcept { -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif const_reference lhs = *this; JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif } @@ -23087,12 +23087,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ friend bool operator==(const_reference lhs, const_reference rhs) noexcept { -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ #pragma GCC diagnostic pop #endif } diff --git a/profiler/src/main.cpp b/profiler/src/main.cpp index 0b6d34cae8..b10a668003 100644 --- a/profiler/src/main.cpp +++ b/profiler/src/main.cpp @@ -19,14 +19,6 @@ # include #endif -#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) -//_WIN32 - only tested on windows - -// gcc throws error for not present std::pow function, -// clang does not have that problem -#include -#endif - #define STB_IMAGE_IMPLEMENTATION #define STBI_ONLY_PNG #include "stb_image.h" diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 0e9768beb5..a8c8b9e976 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -10,14 +10,6 @@ #include "TracyTimelineItemThread.hpp" #include "TracyView.hpp" -#if defined _WIN32 && (defined __GNUC__ && !defined __clang__) -//_WIN32 - only tested on windows - -// gcc throws error for not present std::pow function, -// clang does not have that problem -#include -#endif - namespace tracy { diff --git a/profiler/src/stb_image.h b/profiler/src/stb_image.h index d0c064bb36..c7041e6501 100644 --- a/profiler/src/stb_image.h +++ b/profiler/src/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,6 +48,7 @@ LICENSE RECENT REVISION HISTORY: + 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes @@ -621,7 +622,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local - #elif defined(__GNUC__) && __GNUC__ < 5 + #elif defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) @@ -658,7 +659,6 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI_NOTUSED(v) (void)sizeof(v) #endif - #ifdef _MSC_VER #define stbi_lrot(x,y) _lrotl(x,y) #elif defined __clang__ @@ -728,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if defined _MSC_VER && _MSC_VER >= 1400 // not VC6 +#if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { @@ -1075,8 +1075,8 @@ static int stbi__addints_valid(int a, int b) return a <= INT_MAX - b; } -// returns 1 if the product of two signed shorts is valid, 0 on overflow. -static int stbi__mul2shorts_valid(short a, short b) +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid @@ -3387,13 +3387,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } -static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - while (x == 255) { // might be a marker + stbi_uc x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { @@ -4179,6 +4179,7 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; + int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -4245,9 +4246,20 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { - return -1; /* report error for unexpected end of data. */ + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding btis + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); } - stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { @@ -4312,6 +4324,13 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our bit + // buffer so the decoder can just do its speculative decoding. But if we + // actually consumed any of those bits (which is the case when num_bits < 16), + // the stream actually read past the end so it is malformed. + return stbi__err("unexpected end","Corrupt PNG"); + } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data @@ -4323,7 +4342,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { + if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -4467,6 +4486,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; + a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -4622,9 +4642,8 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filters used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first, - STBI__F_paeth_first + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first }; static stbi_uc first_row_filter[5] = @@ -4633,29 +4652,56 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_paeth_first + STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub }; static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i=x-1; i >= 0; --i) { + dest[i*2+1] = 255; + dest[i*2+0] = src[i]; + } + } else { + STBI_ASSERT(img_n == 3); + for (i=x-1; i >= 0; --i) { + dest[i*4+3] = 255; + dest[i*4+2] = src[i*3+2]; + dest[i*4+1] = src[i*3+1]; + dest[i*4+0] = src[i*3+0]; + } + } +} + // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16? 2 : 1); + int bytes = (depth == 16 ? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; + stbi_uc *filter_buf; + int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4667,8 +4713,11 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, @@ -4676,189 +4725,137 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + // Allocate two scan lines worth of filter workspace buffer. + filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; + } + for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior; + // cur/prior filter buffers alternate + stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; + stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; + stbi_uc *dest = a->out + stride*j; + int nk = width * filter_bytes; int filter = *raw++; - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter","Corrupt PNG"); + break; } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); + break; + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); + break; + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); + break; } - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define STBI__CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; - } - #undef STBI__CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define STBI__CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; - } - #undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } - } + raw += nk; - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + // expand decoded bits in cur to dest, also adding an extra alpha channel if desired + if (depth < 8) { stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + stbi_uc *in = cur; + stbi_uc *out = dest; + stbi_uc inb = 0; + stbi__uint32 nsmp = x*img_n; - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - + // expand bits to bytes first if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); + for (i=0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; } - if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); + for (i=0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); + } else { + STBI_ASSERT(depth == 1); + for (i=0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; + + // insert alpha=255 values if desired + if (img_n != out_n) + stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x*img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + stbi__uint16 *dest16 = (stbi__uint16*)dest; + stbi__uint32 nsmp = x*img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + STBI_ASSERT(img_n+1 == out_n); if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; } } else { STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; } } } } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; - - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } } + STBI_FREE(filter_buf); + if (!all_ok) return 0; + return 1; } diff --git a/public/TracyClient.cpp b/public/TracyClient.cpp index 69a5a6df70..e50f1abaa3 100644 --- a/public/TracyClient.cpp +++ b/public/TracyClient.cpp @@ -51,7 +51,7 @@ #endif #ifdef _MSC_VER -// when gcc and clang linker options will be used +// for gcc and clang added with linker options # pragma comment(lib, "ws2_32.lib") # pragma comment(lib, "dbghelp.lib") # pragma comment(lib, "advapi32.lib") diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 0b2f610ba2..336e99b46d 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -103,7 +103,7 @@ # define TRACY_DELAYED_INIT # endif #else -# if defined __GNUC__ || defined __clang__ +# if defined __GNUC__ # define init_order( val ) __attribute__ ((init_priority(val))) # else # define init_order(x) @@ -117,7 +117,7 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR extern "C" typedef char* (WINAPI *t_WineGetVersion)(); extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); -# if defined __clang__ || defined __GNUC__ +# if defined __GNUC__ // _WIN32 # include #endif @@ -1498,7 +1498,7 @@ Profiler::Profiler() m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); -#if defined _WIN32 && (defined __clang__ || defined __GNUC__) +#if defined _WIN32 && defined __clang__ m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; @@ -1692,7 +1692,7 @@ Profiler::~Profiler() #ifndef _WIN32 close( m_pipe[0] ); close( m_pipe[1] ); -#elif defined __clang__ || defined __GNUC__ +#elif defined __GNUC__ // _WIN32 _close(m_pipe[0]); _close(m_pipe[1]); @@ -5083,4 +5083,4 @@ TRACY_API int32_t ___tracy_profiler_started( void ) } #endif -#endif +#endif \ No newline at end of file diff --git a/public/client/TracyProfiler.hpp b/public/client/TracyProfiler.hpp index 368889763c..7326af0462 100644 --- a/public/client/TracyProfiler.hpp +++ b/public/client/TracyProfiler.hpp @@ -1067,7 +1067,7 @@ class Profiler #if defined _WIN32 void* m_prevHandler; - #if defined __clang__ || defined __GNUC__ + #if defined __GNUC__ int m_pipe[2]; int m_pipeBufSize; #endif diff --git a/public/client/tracy_concurrentqueue.h b/public/client/tracy_concurrentqueue.h index 693d3b2c6b..4178d39ead 100644 --- a/public/client/tracy_concurrentqueue.h +++ b/public/client/tracy_concurrentqueue.h @@ -34,7 +34,7 @@ #include "../common/TracyForceInline.hpp" #include "../common/TracySystem.hpp" -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings // upon assigning any computed values) @@ -64,7 +64,7 @@ namespace tracy // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) inline bool cqLikely(bool x) { return __builtin_expect((x), true); } inline bool cqUnlikely(bool x) { return __builtin_expect((x), false); } #else @@ -1436,6 +1436,6 @@ inline void swap(ConsumerToken& a, ConsumerToken& b) noexcept } /* namespace tracy */ -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) #pragma GCC diagnostic pop #endif diff --git a/public/common/TracyForceInline.hpp b/public/common/TracyForceInline.hpp index 3803e96e0b..b6a5833e58 100644 --- a/public/common/TracyForceInline.hpp +++ b/public/common/TracyForceInline.hpp @@ -1,7 +1,7 @@ #ifndef __TRACYFORCEINLINE_HPP__ #define __TRACYFORCEINLINE_HPP__ -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) # define tracy_force_inline __attribute__((always_inline)) inline #elif defined(_MSC_VER) # define tracy_force_inline __forceinline @@ -9,7 +9,7 @@ # define tracy_force_inline inline #endif -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) # define tracy_no_inline __attribute__((noinline)) #elif defined(_MSC_VER) # define tracy_no_inline __declspec(noinline) diff --git a/public/common/tracy_lz4.cpp b/public/common/tracy_lz4.cpp index 7e78da9ddf..bb032bfc73 100644 --- a/public/common/tracy_lz4.cpp +++ b/public/common/tracy_lz4.cpp @@ -74,11 +74,11 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ -# if (defined(__GNUC__) || defined(__clang__)) && \ +# if defined(__GNUC__) && \ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define LZ4_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || (defined(__GNUC__) || defined(__clang__)) +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) # define LZ4_FORCE_MEMORY_ACCESS 1 # endif #endif diff --git a/server/TracyFileRead.hpp b/server/TracyFileRead.hpp index 10c62b06f1..5404ee342d 100644 --- a/server/TracyFileRead.hpp +++ b/server/TracyFileRead.hpp @@ -15,7 +15,7 @@ #include #include -#if defined _MSC_VER || defined __clang__ || defined __GNUC__ +#if defined _MSC_VER || (defined _WIN32 && defined __GNUC__) // MSCV, gcc and clang compilers contain _stat64 # define stat64 _stat64 #endif diff --git a/server/TracyPopcnt.hpp b/server/TracyPopcnt.hpp index 945324fd91..3aa8308680 100644 --- a/server/TracyPopcnt.hpp +++ b/server/TracyPopcnt.hpp @@ -8,7 +8,7 @@ # include # define TracyCountBits __popcnt64 # define TracyLzcnt __lzcnt64 -#elif defined __GNUC__ || defined __clang__ +#elif defined __GNUC__ static inline uint64_t TracyCountBits( uint64_t i ) { return uint64_t( __builtin_popcountll( i ) ); diff --git a/server/TracyPrint.cpp b/server/TracyPrint.cpp index 95029f557e..8d3606ba4f 100644 --- a/server/TracyPrint.cpp +++ b/server/TracyPrint.cpp @@ -1,7 +1,7 @@ #ifdef _MSC_VER # pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data #endif -#if defined __MINGW32__ || defined __GNUC__ || defined __clang__ +#if defined __MINGW32__ || defined __GNUC__ # define __STDC_FORMAT_MACROS #endif diff --git a/server/TracyPrint.hpp b/server/TracyPrint.hpp index ed9b62bb8a..80b087e46f 100644 --- a/server/TracyPrint.hpp +++ b/server/TracyPrint.hpp @@ -16,7 +16,7 @@ # define NO_CHARCONV #endif -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ # define NO_CHARCONV #endif diff --git a/server/tracy_xxhash.h b/server/tracy_xxhash.h index 0b0f8d57a9..02438fcb73 100644 --- a/server/tracy_xxhash.h +++ b/server/tracy_xxhash.h @@ -260,7 +260,7 @@ extern "C" { # define XXH_STATIC_LINKING_ONLY /* make all functions private */ # undef XXH_PUBLIC_API -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline @@ -373,15 +373,15 @@ extern "C" { /*! @brief Marks a global symbol. */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllexport)) # elif # define XXH_PUBLIC_API __declspec(dllexport) # endif # elif XXH_IMPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllimport)) # elif # define XXH_PUBLIC_API __declspec(dllimport) @@ -457,15 +457,15 @@ extern "C" { /* specific declaration modes for Windows */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && (defined(_MSC_VER) || defined(__clang__) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# if defined(WIN32) && (defined(_MSC_VER) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllexport)) # elif # define XXH_PUBLIC_API __declspec(dllexport) # endif # elif XXH_IMPORT -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) # __attribute__((dllimport)) # elif # define XXH_PUBLIC_API __declspec(dllimport) @@ -476,7 +476,7 @@ extern "C" { # endif #endif -#if defined (__GNUC__) || defined (__clang__) +#if defined (__GNUC__) # define XXH_CONSTF __attribute__((const)) # define XXH_PUREF __attribute__((pure)) # define XXH_MALLOCF __attribute__((malloc)) @@ -1494,7 +1494,7 @@ struct XXH64_state_s { #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ /* In C++ alignas() is a keyword */ # define XXH_ALIGN(n) alignas(n) -#elif defined(__GNUC__) || defined(__clang__) +#elif defined(__GNUC__) # define XXH_ALIGN(n) __attribute__ ((aligned(n))) #elif defined(_MSC_VER) # define XXH_ALIGN(n) __declspec(align(n)) @@ -1505,7 +1505,7 @@ struct XXH64_state_s { /* Old GCC versions only accept the attribute after the type in structures. */ #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && (defined(__GNUC__) || defined(__clang__)) + && defined(__GNUC__) # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) #else # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type @@ -1999,7 +1999,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, /* prefer __packed__ structures (method 1) for GCC * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy * which for some reason does unaligned loads. */ -# if (defined(__GNUC__) || defined(__clang__)) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif diff --git a/test/stb_image.h b/test/stb_image.h index 4c03e4a8d1..7a8b294fcd 100644 --- a/test/stb_image.h +++ b/test/stb_image.h @@ -728,7 +728,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if defined (_MSC_VER && _MSC_VER >= 1400) // not VC6 +#if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { From 7303aa0fe7edacb876cbce8732ebfd6f39170807 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 23:01:08 +0200 Subject: [PATCH 11/15] config.cmake fix --- cmake/config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 1bdb25905e..4874684224 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -47,7 +47,7 @@ message("compiler = ${CMAKE_C_COMPILER_ID}") if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) # gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp - if(WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) + if(NOT WIN32 OR (WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"))) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) endif() endif() From 070b10e8d58721288418b0ee72e22ae9bb66e707 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 8 Jun 2025 23:23:16 +0200 Subject: [PATCH 12/15] Removal of test line of code --- cmake/config.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 4874684224..ec655cbcff 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -43,8 +43,6 @@ if(EMSCRIPTEN) add_compile_options(-pthread -DIMGUI_IMPL_OPENGL_ES2) endif() -message("compiler = ${CMAKE_C_COMPILER_ID}") - if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN) # gcc on windows can't handle section count resulting during compilation of profiler/src/profiler/TracyMicroArchitecture.cpp if(NOT WIN32 OR (WIN32 AND NOT ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"))) From 5ce4d5e4283179f54cd89d9cc1787ab3af7e1ea3 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 22 Jun 2025 18:57:25 +0200 Subject: [PATCH 13/15] Fixed line endings in TracyProfiler.cpp and rebase --- public/client/TracyProfiler.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 336e99b46d..3b512e3949 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -1498,7 +1498,7 @@ Profiler::Profiler() m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); -#if defined _WIN32 && defined __clang__ +#if defined _WIN32 && defined __GNUC__ m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; @@ -3181,6 +3181,7 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); +<<<<<<< HEAD #ifdef _WIN32 # ifdef _MSC_VER @@ -3221,8 +3222,11 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) } offset += result1; } +<<<<<<< HEAD # endif +======= +>>>>>>> 87316f28 (Fixed line endings in TracyProfiler.cpp) #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) @@ -5083,4 +5087,8 @@ TRACY_API int32_t ___tracy_profiler_started( void ) } #endif -#endif \ No newline at end of file +<<<<<<< HEAD +#endif +======= +#endif +>>>>>>> 87316f28 (Fixed line endings in TracyProfiler.cpp) From 592b90b3dc6c6bc73367b60c80be53a9b22348f6 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Sun, 22 Jun 2025 20:23:37 +0200 Subject: [PATCH 14/15] Deleted leftover rebase text --- public/client/TracyProfiler.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index 3b512e3949..ddab9bd83e 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -3181,7 +3181,6 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); -<<<<<<< HEAD #ifdef _WIN32 # ifdef _MSC_VER @@ -3222,11 +3221,8 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) } offset += result1; } -<<<<<<< HEAD # endif -======= ->>>>>>> 87316f28 (Fixed line endings in TracyProfiler.cpp) #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) @@ -5087,8 +5083,4 @@ TRACY_API int32_t ___tracy_profiler_started( void ) } #endif -<<<<<<< HEAD #endif -======= -#endif ->>>>>>> 87316f28 (Fixed line endings in TracyProfiler.cpp) From 8efc8659db343f1c955e7d471be584fcc63133e2 Mon Sep 17 00:00:00 2001 From: Filip <> Date: Wed, 25 Jun 2025 14:55:51 +0200 Subject: [PATCH 15/15] cleanup for master branch --- import/src/json.hpp | 18 +++--- profiler/src/profiler/TracyView_FindZone.cpp | 1 + profiler/src/stb_image.h | 8 +-- public/client/TracyProfiler.cpp | 68 +++++++++----------- public/common/tracy_lz4.cpp | 6 +- server/TracyPopcnt.hpp | 2 +- server/TracyPrint.cpp | 2 +- server/TracyPrint.hpp | 2 +- server/tracy_xxhash.h | 14 ++-- test/stb_image.h | 12 ++-- 10 files changed, 59 insertions(+), 74 deletions(-) diff --git a/import/src/json.hpp b/import/src/json.hpp index f7aeee4c6d..8b72ea6539 100644 --- a/import/src/json.hpp +++ b/import/src/json.hpp @@ -16799,7 +16799,7 @@ class binary_writer void write_compact_float(const number_float_t n, detail::input_format_t format) { -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -16819,7 +16819,7 @@ class binary_writer : get_msgpack_float_prefix(n)); write_number(n); } -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic pop #endif } @@ -17981,7 +17981,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '-'; } -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif @@ -17993,7 +17993,7 @@ char* to_chars(char* first, const char* last, FloatType value) *first++ = '0'; return first; } -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -21187,7 +21187,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::negation>, detail::negation>, detail::negation>>, -#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) +#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) detail::negation>, #endif #if defined(JSON_HAS_CPP_17) && JSON_HAS_STATIC_RTTI @@ -22983,13 +22983,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ bool operator==(const_reference rhs) const noexcept { -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif const_reference lhs = *this; JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic pop #endif } @@ -23087,12 +23087,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/operator_eq/ friend bool operator==(const_reference lhs, const_reference rhs) noexcept { -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif JSON_IMPLEMENT_OPERATOR( ==, true, false, false) -#if defined __GNUC__ +#ifdef __GNUC__ #pragma GCC diagnostic pop #endif } diff --git a/profiler/src/profiler/TracyView_FindZone.cpp b/profiler/src/profiler/TracyView_FindZone.cpp index f6cf6ee95a..c57bfe3058 100644 --- a/profiler/src/profiler/TracyView_FindZone.cpp +++ b/profiler/src/profiler/TracyView_FindZone.cpp @@ -307,6 +307,7 @@ void View::DrawFindZone() if( ImGui::Button( ICON_FA_BAN " Clear" ) ) { + m_findZone.pattern[0] = '\0'; m_findZone.Reset(); } ImGui::SameLine(); diff --git a/profiler/src/stb_image.h b/profiler/src/stb_image.h index c7041e6501..aadab972b1 100644 --- a/profiler/src/stb_image.h +++ b/profiler/src/stb_image.h @@ -660,15 +660,13 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #endif #ifdef _MSC_VER - #define stbi_lrot(x,y) _lrotl(x,y) + #define stbi_lrot(x,y) _lrotl(x,y) #elif defined __clang__ // 32bit version of function as stb image uses this function to rotate 32bit integers #define stbi_lrot(x,y) __builtin_rotateleft32(x,y) -#elif defined __GNUC__ - // gcc built-in is type-generic with first argument being any unsigned integer and second any signed or unsigned integer or char - #define stbi_lrot(x,y) __builtin_stdc_rotate_left(x,y) #else - #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) + // gcc does not provide builtin rotate left funciton for C++ (__builtin_stdc_rotate_left is available only in C) + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index ddab9bd83e..1e6c8c6345 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -103,7 +103,7 @@ # define TRACY_DELAYED_INIT # endif #else -# if defined __GNUC__ +# ifdef __GNUC__ # define init_order( val ) __attribute__ ((init_priority(val))) # else # define init_order(x) @@ -302,10 +302,6 @@ static bool EnsureReadable( uintptr_t address ) } #endif -#if defined __linux__ - bool -#endif - #ifndef TRACY_DELAYED_INIT struct InitTimeWrapper @@ -1464,12 +1460,6 @@ Profiler::Profiler() s_token_detail = moodycamel::ProducerToken( s_queue ); s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; s_threadHandle = ThreadHandleWrapper { m_mainThread }; -# else - //#error FilipNur check if works - // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. - s_token_detail = moodycamel::ProducerToken( s_queue ); - s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; - s_threadHandle = ThreadHandleWrapper { m_mainThread }; # endif #endif @@ -1504,11 +1494,11 @@ Profiler::Profiler() { // scope for temporary variable originalHandlesCount int originalHandlesCount = _getmaxstdio(); - + while(_pipe(m_pipe, m_pipeBufSize, _O_BINARY) != 0) { if ((errno == EMFILE) || (errno == ENFILE)) - { + { // safe upper bound for exceptional situations if(_getmaxstdio() > (originalHandlesCount + 10)) { @@ -3560,32 +3550,32 @@ void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) case SymbolQueueItemType::KernelCode: { #ifdef _WIN32 - auto mod = GetKernelModulePath( si.ptr ); - if( mod ) - { - auto fn = DecodeCallstackPtrFast( si.ptr ); - if( *fn ) - { - auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); - if( hnd ) - { - auto ptr = (const void*)GetProcAddress( hnd, fn ); - if( ptr ) - { - auto buf = (char*)tracy_malloc( si.extra ); - memcpy( buf, ptr, si.extra ); - FreeLibrary( hnd ); - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } - FreeLibrary( hnd ); - } - } - } + auto mod = GetKernelModulePath( si.ptr ); + if( mod ) + { + auto fn = DecodeCallstackPtrFast( si.ptr ); + if( *fn ) + { + auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); + if( hnd ) + { + auto ptr = (const void*)GetProcAddress( hnd, fn ); + if( ptr ) + { + auto buf = (char*)tracy_malloc( si.extra ); + memcpy( buf, ptr, si.extra ); + FreeLibrary( hnd ); + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } + FreeLibrary( hnd ); + } + } + } #elif defined __linux__ void* data = m_kcore->Retrieve( si.ptr, si.extra ); if( data ) diff --git a/public/common/tracy_lz4.cpp b/public/common/tracy_lz4.cpp index bb032bfc73..cff579c57b 100644 --- a/public/common/tracy_lz4.cpp +++ b/public/common/tracy_lz4.cpp @@ -345,9 +345,7 @@ namespace tracy * environments. This is needed when decompressing the Linux Kernel, for example. */ #if !defined(LZ4_memcpy) -# if defined(__clang__) -# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) -# elif defined(__GNUC__) && (__GNUC__ >= 4) +# if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) # define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) # else # define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) @@ -1285,7 +1283,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( } else { *op++ = (BYTE)(lastRun< # define TracyCountBits __popcnt64 # define TracyLzcnt __lzcnt64 -#elif defined __GNUC__ +#elif defined __GNUC__ || defined __clang__ static inline uint64_t TracyCountBits( uint64_t i ) { return uint64_t( __builtin_popcountll( i ) ); diff --git a/server/TracyPrint.cpp b/server/TracyPrint.cpp index 8d3606ba4f..9111ddb0bd 100644 --- a/server/TracyPrint.cpp +++ b/server/TracyPrint.cpp @@ -1,7 +1,7 @@ #ifdef _MSC_VER # pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data #endif -#if defined __MINGW32__ || defined __GNUC__ +#ifdef __MINGW32__ # define __STDC_FORMAT_MACROS #endif diff --git a/server/TracyPrint.hpp b/server/TracyPrint.hpp index 80b087e46f..d38245e359 100644 --- a/server/TracyPrint.hpp +++ b/server/TracyPrint.hpp @@ -16,7 +16,7 @@ # define NO_CHARCONV #endif -#if defined __GNUC__ +#ifdef __GNUC__ # define NO_CHARCONV #endif diff --git a/server/tracy_xxhash.h b/server/tracy_xxhash.h index 02438fcb73..da07976123 100644 --- a/server/tracy_xxhash.h +++ b/server/tracy_xxhash.h @@ -376,14 +376,14 @@ extern "C" { # if defined(WIN32) && (defined(_MSC_VER) || defined(__GNUC__)) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) # ifdef XXH_EXPORT # if defined(__GNUC__) -# __attribute__((dllexport)) -# elif +# define XXH_PUBLIC_API __attribute__((dllexport)) +# else # define XXH_PUBLIC_API __declspec(dllexport) # endif # elif XXH_IMPORT # if defined(__GNUC__) -# __attribute__((dllimport)) -# elif +# define XXH_PUBLIC_API __attribute__((dllimport)) +# else # define XXH_PUBLIC_API __declspec(dllimport) # endif # endif @@ -2429,7 +2429,7 @@ static int XXH_isLittleEndian(void) */ #if XXH_HAS_BUILTIN(__builtin_unreachable) -// gcc and clang should have this as builtin +// gcc and clang support this builtin # define XXH_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) @@ -2460,8 +2460,8 @@ static int XXH_isLittleEndian(void) */ #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ && XXH_HAS_BUILTIN(__builtin_rotateleft64) -# define XXH_rotl32(x,r) __builtin_rotateleft32(x,r) -# define XXH_rotl64(x,r) __builtin_rotateleft64(x,r) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ #elif defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) diff --git a/test/stb_image.h b/test/stb_image.h index 7a8b294fcd..d2e3ba74d3 100644 --- a/test/stb_image.h +++ b/test/stb_image.h @@ -621,7 +621,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local - #elif defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 5 + #elif defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__) #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) @@ -660,15 +660,13 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER - #define stbi_lrot(x,y) _lrotl(x,y) + #define stbi_lrot(x,y) _lrotl(x,y) #elif defined __clang__ // 32bit version of function as stb image uses this function to rotate 32bit integers #define stbi_lrot(x,y) __builtin_rotateleft32(x,y) -#elif defined __GNUC__ - // gcc built-in is type-generic with first argument being any unsigned integer and second any signed or unsigned integer or char - #define stbi_lrot(x,y) __builtin_stdc_rotate_left(x,y) #else - #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) + // gcc does not provide builtin rotate left funciton for C++ (__builtin_stdc_rotate_left is available only in C) + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) @@ -728,7 +726,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#if _MSC_VER >= 1400 // not VC6 +#if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) {