From cf9b249ff0fee1e0f8da5732eca103052b1874a6 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 2 Jun 2026 07:49:58 +0200 Subject: [PATCH 1/3] fix(spinlock): bound shared lock acquisition in signal handler paths Replace OptionalSharedLockGuard's unbounded tryLockShared spin with a CAS-retry budget (DEFAULT_SHARED_SPIN_BUDGET=256). Under heavy reader contention the guard now returns false after exhausting the budget rather than spinning indefinitely inside a signal handler. Adds debug-mode assertions to unlock()/unlockShared(), a new JVMTI_STACKS_DROPPED_REC_LOCK observability counter, and a gtest suite for the new behaviour. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/counters.h | 5 +- ddprof-lib/src/main/cpp/flightRecorder.cpp | 2 + ddprof-lib/src/main/cpp/spinLock.h | 46 ++++++-- ddprof-lib/src/test/cpp/spinLock_ut.cpp | 116 +++++++++++++++++++++ 4 files changed, 162 insertions(+), 7 deletions(-) create mode 100644 ddprof-lib/src/test/cpp/spinLock_ut.cpp diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index 70b551737..b1cb724b7 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -114,7 +114,10 @@ X(JVMTI_STACKS_REQUESTED, "jvmti_stacks_requested") \ X(JVMTI_STACKS_FAILED_WRONG_PHASE, "jvmti_stacks_failed_wrong_phase") \ X(JVMTI_STACKS_FAILED_OTHER, "jvmti_stacks_failed_other") \ - X(JVMTI_STACKS_DROPPED_LOCK, "jvmti_stacks_dropped_lock") + /* Sequential drop layers for delegated stacks: slot-lock first, then \ + * rec-lock; both counted against a single JVMTI_STACKS_REQUESTED. */ \ + X(JVMTI_STACKS_DROPPED_LOCK, "jvmti_stacks_dropped_lock") \ + X(JVMTI_STACKS_DROPPED_REC_LOCK, "jvmti_stacks_dropped_rec_lock") #define X_ENUM(a, b) a, typedef enum CounterId : int { DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 3b788efc1..04b53e4bc 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -2111,6 +2111,8 @@ void FlightRecorder::recordEventDelegated(int lock_index, int tid, rec->flushIfNeeded(buf); rec->addThread(lock_index, tid); } + } else { + Counters::increment(JVMTI_STACKS_DROPPED_REC_LOCK); } } diff --git a/ddprof-lib/src/main/cpp/spinLock.h b/ddprof-lib/src/main/cpp/spinLock.h index f1e825e30..3f94b6cfd 100644 --- a/ddprof-lib/src/main/cpp/spinLock.h +++ b/ddprof-lib/src/main/cpp/spinLock.h @@ -18,6 +18,7 @@ #define _SPINLOCK_H #include "arch.h" +#include // Cannot use regular mutexes inside signal handler. // This lock is based on CAS busy loop. GCC atomic builtins imply full barrier. @@ -44,7 +45,15 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { } } - void unlock() { __sync_fetch_and_sub(&_lock, 1); } + void unlock() { + assert(_lock == 1); + __sync_fetch_and_sub(&_lock, 1); + } + + // Spin budget for bounded shared acquisition in signal handlers. + // Bounds the number of CAS-retry iterations under reader contention; + // does NOT bound wall-clock latency (CAS stall time is hardware-dependent). + static constexpr int DEFAULT_SHARED_SPIN_BUDGET = 256; bool tryLockShared() { // Spins while no exclusive lock is held and the CAS to acquire a shared @@ -60,6 +69,23 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { return false; } + // Bounded variant for signal-handler paths. Returns false when an exclusive + // lock is observed OR the spin budget is exhausted under reader contention. + bool tryLockShared(int max_spins) { + int value; + int spins = 0; + while ((value = __atomic_load_n(&_lock, __ATOMIC_ACQUIRE)) <= 0) { + if (__sync_bool_compare_and_swap(&_lock, value, value - 1)) { + return true; + } + if (++spins >= max_spins) { + return false; + } + spinPause(); + } + return false; + } + void lockShared() { int value; while ((value = __atomic_load_n(&_lock, __ATOMIC_ACQUIRE)) > 0 || @@ -68,7 +94,10 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { } } - void unlockShared() { __sync_fetch_and_add(&_lock, 1); } + void unlockShared() { + assert(_lock < 0); + __sync_fetch_and_add(&_lock, 1); + } }; // RAII guard classes for automatic lock management @@ -89,12 +118,17 @@ class SharedLockGuard { SharedLockGuard& operator=(SharedLockGuard&&) = delete; }; +// Acquires a shared lock with a bounded CAS-retry budget. Returns without +// acquiring (ownsLock() == false) when an exclusive lock is observed or +// the spin budget is exhausted under reader contention. Safe to use in +// signal-handler paths. class OptionalSharedLockGuard { SpinLock* _lock; public: - explicit OptionalSharedLockGuard(SpinLock* lock) : _lock(lock) { - if (!_lock->tryLockShared()) { - // Exclusive lock is held; no unlock needed. Only fails when an exclusive lock is observed. + explicit OptionalSharedLockGuard( + SpinLock* lock, int max_spins = SpinLock::DEFAULT_SHARED_SPIN_BUDGET) + : _lock(lock) { + if (!_lock->tryLockShared(max_spins)) { _lock = nullptr; } } @@ -103,7 +137,7 @@ class OptionalSharedLockGuard { _lock->unlockShared(); } } - bool ownsLock() { return _lock != nullptr; } + bool ownsLock() const { return _lock != nullptr; } // Non-copyable and non-movable OptionalSharedLockGuard(const OptionalSharedLockGuard&) = delete; diff --git a/ddprof-lib/src/test/cpp/spinLock_ut.cpp b/ddprof-lib/src/test/cpp/spinLock_ut.cpp new file mode 100644 index 000000000..5b7ebceeb --- /dev/null +++ b/ddprof-lib/src/test/cpp/spinLock_ut.cpp @@ -0,0 +1,116 @@ +/* + * Copyright 2026 Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "spinLock.h" +#include "../../main/cpp/gtest_crash_handler.h" +#include +#include + +static constexpr char SPINLOCK_TEST_NAME[] = "SpinLockTest"; + +class SpinLockTest : public ::testing::Test { +protected: + void SetUp() override { + installGtestCrashHandler(); + } + void TearDown() override { + restoreDefaultSignalHandlers(); + } + + SpinLock lock; +}; + +// OptionalSharedLockGuard acquires on a free lock and releases on destruction. +TEST_F(SpinLockTest, OptionalGuard_UncontendedAcquire) { + { + OptionalSharedLockGuard g(&lock); + EXPECT_TRUE(g.ownsLock()); + } + // After destruction the lock must be back to 0 (unlocked). + // Verify by taking an exclusive lock — would spin forever if still shared. + EXPECT_TRUE(lock.tryLock()); + lock.unlock(); +} + +// When an exclusive lock is held, tryLockShared returns false immediately +// (first load sees _lock == 1 > 0, exits without spinning). +TEST_F(SpinLockTest, OptionalGuard_ExclusiveHeld_ImmediateReturn) { + lock.lock(); + OptionalSharedLockGuard g(&lock, 1000000); + EXPECT_FALSE(g.ownsLock()); + lock.unlock(); +} + +// The spin budget must be honoured: even with a tiny budget the constructor +// returns (does not hang) when readers are continuously racing the CAS. +TEST_F(SpinLockTest, OptionalGuard_SpinBudgetBound) { + // A background contender thread rapidly locks/unlocks shared to create + // CAS contention on _lock; the guard must return without hanging. + std::atomic stop{false}; + + // Background thread hammers shared lock/unlock to create CAS contention. + std::thread contender([&] { + while (!stop.load(std::memory_order_relaxed)) { + lock.lockShared(); + lock.unlockShared(); + } + }); + + // With a very small budget the guard must return promptly, not hang. + for (int i = 0; i < 1000; ++i) { + OptionalSharedLockGuard g(&lock, 8); + // ownsLock() may be true or false depending on timing — we only assert + // the constructor returned (i.e. we reach here without hanging). + (void)g.ownsLock(); + } + + stop.store(true, std::memory_order_relaxed); + contender.join(); +} + +// Verifies that the budget is enforced: with an exclusive lock held, +// tryLockShared(N) must return false regardless of N. +TEST_F(SpinLockTest, OptionalGuard_BudgetEnforced_ExclusivePath) { + lock.lock(); + // With any budget, exclusive lock causes immediate false return. + EXPECT_FALSE(lock.tryLockShared(1)); + EXPECT_FALSE(lock.tryLockShared(1000)); + EXPECT_FALSE(lock.tryLockShared(SpinLock::DEFAULT_SHARED_SPIN_BUDGET)); + lock.unlock(); +} + +// Multiple shared guards can be held simultaneously (readers don't starve each other). +TEST_F(SpinLockTest, OptionalGuard_SharedReentrancy) { + OptionalSharedLockGuard g1(&lock); + OptionalSharedLockGuard g2(&lock); + EXPECT_TRUE(g1.ownsLock()); + EXPECT_TRUE(g2.ownsLock()); + // Exclusive try must fail while shared locks are held. + EXPECT_FALSE(lock.tryLock()); +} + +// tryLockShared() (unbounded) still works correctly alongside the bounded overload. +TEST_F(SpinLockTest, TryLockShared_ExclusiveHeld_ReturnsFalse) { + lock.lock(); + EXPECT_FALSE(lock.tryLockShared()); + lock.unlock(); +} + +TEST_F(SpinLockTest, TryLockShared_Free_ReturnsTrue) { + EXPECT_TRUE(lock.tryLockShared()); + lock.unlockShared(); +} From f3ad4d1b91282243d3e424ca190b6daf1da151ec Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 2 Jun 2026 08:13:55 +0200 Subject: [PATCH 2/3] fix(spinlock): use atomic loads in unlock/unlockShared asserts Fixes TSAN data race: plain volatile reads in assert() raced with concurrent atomic CAS writes from other threads. Use __atomic_load_n (RELAXED) for assert invariant checks and __atomic_store_n for reset(). Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/spinLock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ddprof-lib/src/main/cpp/spinLock.h b/ddprof-lib/src/main/cpp/spinLock.h index 3f94b6cfd..a8ec1bb72 100644 --- a/ddprof-lib/src/main/cpp/spinLock.h +++ b/ddprof-lib/src/main/cpp/spinLock.h @@ -35,7 +35,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { static_assert(sizeof(SpinLock) == DEFAULT_CACHE_LINE_SIZE); } - void reset() { _lock = 0; } + void reset() { __atomic_store_n(&_lock, 0, __ATOMIC_RELAXED); } bool tryLock() { return __sync_bool_compare_and_swap(&_lock, 0, 1); } @@ -46,7 +46,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { } void unlock() { - assert(_lock == 1); + assert(__atomic_load_n(&_lock, __ATOMIC_RELAXED) == 1); __sync_fetch_and_sub(&_lock, 1); } @@ -95,7 +95,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { } void unlockShared() { - assert(_lock < 0); + assert(__atomic_load_n(&_lock, __ATOMIC_RELAXED) < 0); __sync_fetch_and_add(&_lock, 1); } }; From 444462b66aafaf46f69c3aa3adcd0b4efbb0bd37 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 2 Jun 2026 09:21:55 +0200 Subject: [PATCH 3/3] fix(jfr): count samples dropped when rec_lock acquisition fails Adds SAMPLES_DROPPED_REC_LOCK counter and increments it in the recordEvent() else branch, mirroring what recordEventDelegated() already does for JVMTI_STACKS_DROPPED_REC_LOCK. Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/counters.h | 3 ++- ddprof-lib/src/main/cpp/flightRecorder.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index b1cb724b7..87afab983 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -117,7 +117,8 @@ /* Sequential drop layers for delegated stacks: slot-lock first, then \ * rec-lock; both counted against a single JVMTI_STACKS_REQUESTED. */ \ X(JVMTI_STACKS_DROPPED_LOCK, "jvmti_stacks_dropped_lock") \ - X(JVMTI_STACKS_DROPPED_REC_LOCK, "jvmti_stacks_dropped_rec_lock") + X(JVMTI_STACKS_DROPPED_REC_LOCK, "jvmti_stacks_dropped_rec_lock") \ + X(SAMPLES_DROPPED_REC_LOCK, "samples_dropped_rec_lock") #define X_ENUM(a, b) a, typedef enum CounterId : int { DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 04b53e4bc..9166a4a4b 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -2084,6 +2084,8 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u64 call_trace_id, rec->flushIfNeeded(buf); rec->addThread(lock_index, tid); } + } else { + Counters::increment(SAMPLES_DROPPED_REC_LOCK); } }