Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ddprof-lib/src/main/cpp/arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,14 @@ Error Arguments::parse(const char *args) {
_jvmtistacks = true;
}

CASE("wallprecheck")
if (value != NULL) {
_wall_precheck = strcmp(value, "false") != 0 && strcmp(value, "0") != 0;
} else {
// No value means enable
_wall_precheck = true;
}

CASE("wallsampler")
if (value != NULL) {
switch (value[0]) {
Expand Down
2 changes: 2 additions & 0 deletions ddprof-lib/src/main/cpp/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ class Arguments {
long _cpu;
long _wall;
bool _wall_collapsing;
bool _wall_precheck;
int _wall_threads_per_tick;
WallclockSampler _wallclock_sampler;
long _memory;
Expand Down Expand Up @@ -205,6 +206,7 @@ class Arguments {
_cpu(-1),
_wall(-1),
_wall_collapsing(false),
_wall_precheck(false),
_wall_threads_per_tick(DEFAULT_WALL_THREADS_PER_TICK),
_wallclock_sampler(ASGCT),
_memory(-1),
Expand Down
2 changes: 2 additions & 0 deletions ddprof-lib/src/main/cpp/counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
X(AGCT_NATIVE_NO_JAVA_CONTEXT, "agct_native_no_java_context") \
X(AGCT_BLOCKED_IN_VM, "agct_blocked_in_vm") \
X(SKIPPED_WALLCLOCK_UNWINDS, "skipped_wallclock_unwinds") \
X(WC_SIGNAL_SUPPRESSED_SAMPLED_RUN, "wc_signals_suppressed_sampled_run") \
X(WC_SIGNAL_QUEUE_FULL, "wc_signals_queue_full") \
X(UNWINDING_TIME_ASYNC, "unwinding_ticks_async") \
X(UNWINDING_TIME_JVMTI, "unwinding_ticks_jvmti") \
X(CALLTRACE_STORAGE_DROPPED, "calltrace_storage_dropped_traces") \
Expand Down
11 changes: 10 additions & 1 deletion ddprof-lib/src/main/cpp/event.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,13 @@ class WallClockEpochEvent {
u32 _num_failed_samples;
u32 _num_exited_threads;
u32 _num_permission_denied;
u64 _num_suppressed_sampled_run;

WallClockEpochEvent(u64 start_time)
: _dirty(false), _start_time(start_time), _duration_millis(0),
_num_samplable_threads(0), _num_successful_samples(0),
_num_failed_samples(0), _num_exited_threads(0),
_num_permission_denied(0) {}
_num_permission_denied(0), _num_suppressed_sampled_run(0) {}

bool hasChanged() { return _dirty; }

Expand Down Expand Up @@ -153,13 +154,21 @@ class WallClockEpochEvent {
}
}

void addNumSuppressedSampledRun(u64 n) {
if (n > 0) {
_dirty = true;
_num_suppressed_sampled_run += n;
}
}

void endEpoch(u64 millis) { _duration_millis = millis; }

void clean() { _dirty = false; }

void newEpoch(u64 start_time) {
_dirty = false;
_start_time = start_time;
_num_suppressed_sampled_run = 0;
}
};

Expand Down
1 change: 1 addition & 0 deletions ddprof-lib/src/main/cpp/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,7 @@ void Recording::recordWallClockEpoch(Buffer *buf, WallClockEpochEvent *event) {
buf->putVar64(event->_num_failed_samples);
buf->putVar64(event->_num_exited_threads);
buf->putVar64(event->_num_permission_denied);
buf->putVar64(event->_num_suppressed_sampled_run);
writeEventSizePrefix(buf, start);
flushIfNeeded(buf);
}
Expand Down
9 changes: 9 additions & 0 deletions ddprof-lib/src/main/cpp/hotspot/vmStructs.inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,21 @@

#include "hotspot/vmStructs.h"
#include "jvmThread.h"
#include "vmEntry.h"

VMThread* VMThread::current() {
// JVMThread::current() is the native thread self pointer. On OpenJ9/Zing it
// is not a HotSpot JavaThread*; only HotSpot may reinterpret it as VMThread*.
if (!VM::isHotspot() || JVMThread::current() == nullptr) {
return nullptr;
}
return VMThread::cast(JVMThread::current());
}

VMThread* VMThread::fromJavaThread(JNIEnv* env, jthread thread) {
if (!VM::isHotspot()) {
return nullptr;
}
assert(_eetop != nullptr);
if (_eetop != nullptr) {
jlong eetop = env->GetLongField(thread, _eetop);
Expand Down
36 changes: 34 additions & 2 deletions ddprof-lib/src/main/cpp/javaApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "counters.h"
#include "common.h"
#include "engine.h"
#include "hotspot/vmStructs.h"
#include "hotspot/vmStructs.inline.h"
#include "incbin.h"
#include "jvmThread.h"
#include "os.h"
Expand Down Expand Up @@ -155,10 +155,13 @@ JavaCritical_com_datadoghq_profiler_JavaProfiler_filterThreadAdd0() {
slot_id = thread_filter->registerThread();
current->setFilterSlotId(slot_id);
}

if (unlikely(slot_id == -1)) {
return; // Failed to register thread
}
// Reset suppression state so a new thread occupying this slot does not inherit
// stale state from its predecessor. Must happen before add().
thread_filter->resetSlotRunState(slot_id);
thread_filter->add(tid, slot_id);
}

Expand Down Expand Up @@ -314,6 +317,35 @@ Java_com_datadoghq_profiler_JavaProfiler_recordQueueEnd0(
Profiler::instance()->recordQueueTime(tid, &event);
}

extern "C" DLLEXPORT void JNICALL
Java_com_datadoghq_profiler_JavaProfiler_parkEnter0(JNIEnv *env, jclass unused) {
ProfiledThread *current = ProfiledThread::current();
if (current == nullptr) {
return;
}
current->parkEnter(TSC::ticks());
ThreadFilter *tf = Profiler::instance()->threadFilter();
if (tf->enabled()) {
tf->enterBlockedRun(current->filterSlotId(), OSThreadState::CONDVAR_WAIT);
}
}

extern "C" DLLEXPORT void JNICALL
Java_com_datadoghq_profiler_JavaProfiler_parkExit0(
JNIEnv *env, jclass unused, jlong blocker, jlong unblockingSpanId) {
ProfiledThread *current = ProfiledThread::current();
if (current == nullptr) {
return;
}
u64 start_ticks = 0;
Context park_context = {};
current->parkExit(start_ticks, park_context);
ThreadFilter *tf = Profiler::instance()->threadFilter();
if (tf->enabled()) {
tf->exitBlockedRun(current->filterSlotId());
}
}

extern "C" DLLEXPORT jlong JNICALL
Java_com_datadoghq_profiler_JavaProfiler_currentTicks0(JNIEnv *env,
jclass unused) {
Expand Down
4 changes: 3 additions & 1 deletion ddprof-lib/src/main/cpp/jfrMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ void JfrMetadata::initialize(
<< field("numExitedThreads", T_INT,
"Number of Exited Threads Before Handling Signal")
<< field("numPermissionDenied", T_INT,
"Number of Permission Denied Errors"))
"Number of Permission Denied Errors")
<< field("numSuppressedSampledRun", T_LONG,
"Signals suppressed by the wall-clock once-per-run filter"))

<< (type("datadog.ObjectSample", T_ALLOC, "Allocation sample")
<< category("Datadog", "Profiling")
Expand Down
10 changes: 5 additions & 5 deletions ddprof-lib/src/main/cpp/livenessTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,10 @@ Error LivenessTracker::initialize(Arguments &args) {
return Error::OK;
}

// _record_heap_usage controls per-session JFR event emission only, not the
// tracking table. Update it before the _initialized guard so each profiler
// start gets the correct setting even when the table persists across recordings.
_record_heap_usage = args._record_heap_usage;

if (_initialized) {
// Once heap usage recording has been enabled it stays on for the JVM lifetime,
// so a later recording without ':L' does not silently drop HeapUsage events.
_record_heap_usage = _record_heap_usage || args._record_heap_usage;
// if the tracker was previously initialized return the stored result for
// consistency this hack also means that if the profiler is started with
// different arguments for liveness tracking those will be ignored it is
Expand Down Expand Up @@ -270,6 +268,8 @@ Error LivenessTracker::initialize(Arguments &args) {
// enough for 1G of heap
_table = (TrackingEntry *)malloc(sizeof(TrackingEntry) * _table_cap);

_record_heap_usage = args._record_heap_usage;

_gc_epoch = 0;
_last_gc_epoch = 0;

Expand Down
9 changes: 6 additions & 3 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright The async-profiler authors
* Copyright 2024, 2025 Datadog, Inc
* Copyright 2024, 2026 Datadog, Inc
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -80,6 +80,7 @@ void Profiler::onThreadStart(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) {
if (_thread_filter.enabled()) {
int slot_id = _thread_filter.registerThread();
current->setFilterSlotId(slot_id);
_thread_filter.resetSlotRunState(slot_id);
_thread_filter.remove(slot_id); // Remove from filtering initially
}
if (thread != NULL) {
Expand Down Expand Up @@ -550,7 +551,7 @@ void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type,
_locks[lock_index].unlock();
}

void Profiler::recordSample(void *ucontext, u64 counter, int tid,
bool Profiler::recordSample(void *ucontext, u64 counter, int tid,
jint event_type, u64 call_trace_id, Event *event) {
atomicIncRelaxed(_total_samples);

Expand All @@ -566,7 +567,7 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
// collected trace
PerfEvents::resetBuffer(tid);
}
return;
return false;
}

bool truncated = false;
Expand Down Expand Up @@ -616,6 +617,7 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);

_locks[lock_index].unlock();
return true;
}

void Profiler::recordSampleDelegated(void *ucontext, u64 weight, int tid,
Expand Down Expand Up @@ -1203,6 +1205,7 @@ Error Profiler::start(Arguments &args, bool reset) {
_omit_stacktraces = args._lightweight;
_remote_symbolication = args._remote_symbolication;
_libs->setRemoteSymbolication(_remote_symbolication);
_wall_precheck = args._wall_precheck;
_event_mask =
((args._event != NULL && strcmp(args._event, EVENT_NOOP) != 0) ? EM_CPU
: 0) |
Expand Down
5 changes: 3 additions & 2 deletions ddprof-lib/src/main/cpp/profiler.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright The async-profiler authors
* Copyright 2025, Datadog, Inc.
* Copyright 2025, 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -104,6 +104,7 @@ class alignas(alignof(SpinLock)) Profiler {
// ASGCT paths) or _total_samples (written by every recording path).
alignas(DEFAULT_CACHE_LINE_SIZE) volatile u64 _sample_seq;
alignas(DEFAULT_CACHE_LINE_SIZE) u64 _failures[ASGCT_FAILURE_TYPES];
bool _wall_precheck = false;

SpinLock _class_map_lock;
SpinLock _locks[CONCURRENCY_LEVEL];
Expand Down Expand Up @@ -367,7 +368,7 @@ class alignas(alignof(SpinLock)) Profiler {
NativeFrameResolution resolveNativeFrameForWalkVM(uintptr_t pc, int lock_index);
int convertNativeTrace(int native_frames, const void **callchain,
ASGCT_CallFrame *frames, int lock_index);
void recordSample(void *ucontext, u64 weight, int tid, jint event_type,
bool recordSample(void *ucontext, u64 weight, int tid, jint event_type,
u64 call_trace_id, Event *event);
// Delegated sample path: stack-walking is performed by the HotSpot JFR
// RequestStackTrace extension (the JVM emits the stack trace into its own
Expand Down
1 change: 0 additions & 1 deletion ddprof-lib/src/main/cpp/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ void ProfiledThread::release() {
}
}


int ProfiledThread::currentTid() {
ProfiledThread *tls = current();
if (tls != NULL) {
Expand Down
48 changes: 39 additions & 9 deletions ddprof-lib/src/main/cpp/thread.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
/*
* Copyright 2025, Datadog, Inc.
* Copyright 2025, 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

#ifndef _THREAD_H
#define _THREAD_H

#include "context.h"
#include "context_api.h"
#include "otel_context.h"
#include "os.h"
#include "threadLocalData.h"
#include "threadState.h"
#include "unwindStats.h"
#include <atomic>
#include <cstdint>
Expand All @@ -19,7 +21,7 @@
#include <sys/types.h>
#include <vector>

class ProfiledThread : public ThreadLocalData {
class ProfiledThread : public ThreadLocalData {
public:
enum ThreadType : u32 {
TYPE_UNKNOWN = 0,
Expand All @@ -28,6 +30,8 @@ class ProfiledThread : public ThreadLocalData {
TYPE_MASK = TYPE_JAVA_THREAD | TYPE_NOT_JAVA_THREAD
};

static constexpr u32 FLAG_PARKED = 0x4u; // next free bit after TYPE_MASK (0x1|0x2)

private:
// We are allowing several levels of nesting because we can be
// eg. in a crash handler when wallclock signal kicks in,
Expand All @@ -53,6 +57,8 @@ class ProfiledThread : public ThreadLocalData {
u64 _call_trace_id;
u32 _recording_epoch;
u32 _misc_flags;
u64 _park_start_ticks;
Context _park_context;
int _filter_slot_id; // Slot ID for thread filtering
uint8_t _init_window; // Countdown for JVM thread init race window (PROF-13072)
uint8_t _signal_depth; // Nested signal-handler depth (see SignalHandlerScope)
Expand All @@ -73,7 +79,9 @@ class ProfiledThread : public ThreadLocalData {

ProfiledThread(int tid)
: ThreadLocalData(), _pc(0), _sp(0), _span_id(0), _crash_depth(0), _tid(tid), _cpu_epoch(0),
_wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _init_window(0),
_wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0),
_park_start_ticks(0), _park_context{},
_filter_slot_id(-1), _init_window(0),
_signal_depth(0),
_otel_ctx_initialized(false), _crash_protection_active(false),
_otel_ctx_record{}, _otel_tag_encodings{}, _otel_local_root_span_id(0) {};
Expand Down Expand Up @@ -228,13 +236,17 @@ class ProfiledThread : public ThreadLocalData {
return &_otel_ctx_record;
}

// Record java thread state
// CAS RMW to update only TYPE_MASK bits without clobbering FLAG_PARKED set concurrently
// by parkEnter/parkExit running in signal-handler context on the same thread.
inline void setJavaThread(bool is_java) {
if (is_java) {
_misc_flags = ((_misc_flags & ~TYPE_MASK) | TYPE_JAVA_THREAD);
} else {
_misc_flags = ((_misc_flags & ~TYPE_MASK) | TYPE_NOT_JAVA_THREAD);
}
const u32 type_bits = is_java ? static_cast<u32>(TYPE_JAVA_THREAD) : static_cast<u32>(TYPE_NOT_JAVA_THREAD);
u32 cur = __atomic_load_n(&_misc_flags, __ATOMIC_RELAXED);
u32 desired;
do {
desired = (cur & ~static_cast<u32>(TYPE_MASK)) | type_bits;
} while (!__atomic_compare_exchange_n(&_misc_flags, &cur, desired,
/*weak=*/true,
__ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
}

inline enum ThreadType threadType() const {
Expand All @@ -257,6 +269,24 @@ class ProfiledThread : public ThreadLocalData {
_otel_local_root_span_id = 0;
}

inline void parkEnter(u64 start_ticks) {
_park_context = ContextApi::snapshot();
_park_start_ticks = start_ticks;
// Release ensures _park_start_ticks/_park_context are visible before FLAG_PARKED is set.
__atomic_fetch_or(&_misc_flags, FLAG_PARKED, __ATOMIC_RELEASE);
}

// Returns false if the thread was not parked (idempotent).
inline bool parkExit(u64 &start_ticks, Context &park_context) {
u32 prev = __atomic_fetch_and(&_misc_flags, ~FLAG_PARKED, __ATOMIC_ACQ_REL);
if ((prev & FLAG_PARKED) == 0) {
return false;
}
start_ticks = _park_start_ticks;
park_context = _park_context;
return true;
}

Context snapshotContext(size_t numAttrs);

private:
Expand Down
Loading
Loading