diff --git a/.changeset/logger-handler-caps.md b/.changeset/logger-handler-caps.md new file mode 100644 index 0000000000..0e8dc4ccde --- /dev/null +++ b/.changeset/logger-handler-caps.md @@ -0,0 +1,9 @@ +--- +'@core/sync-service': patch +--- + +Cap the overload-protection (OLP) mailboxes of the default console, OpenTelemetry, and Sentry logger handlers so error/log bursts shed messages instead of blocking Logger callers or growing unbounded. + +This is **leading-edge protection only**: it shields against the early phase of a redeployment/error burst but is **not sufficient under deep scheduler starvation** — the real fix for that is upstream (request-proxy admission control and snapshot-pool sizing). + +Note: `sync_mode_qlen` is intentionally not set on the OpenTelemetry log handler — its module forces `sync_mode_qlen == drop_mode_qlen`, so the option would be a no-op there. diff --git a/packages/sync-service/config/runtime.exs b/packages/sync-service/config/runtime.exs index 955218d8e0..6dbbefcb59 100644 --- a/packages/sync-service/config/runtime.exs +++ b/packages/sync-service/config/runtime.exs @@ -24,6 +24,12 @@ config :logger, :default_formatter, metadata: [:pid, :shape_handle, :request_id], colors: [enabled: env!("ELECTRIC_LOG_COLORS", :boolean!, true)] +# The default logger_std_h handler writes application logs to stdout. Cap its +# OLP mailbox so error bursts shed stdout logs instead of blocking Logger +# callers (sync mode) or letting the queue grow unbounded. +config :logger, :default_handler, + config: %{sync_mode_qlen: 2000, drop_mode_qlen: 2000, flush_qlen: 5000} + # Enable this to get **very noisy** but useful messages from BEAM about # processes being started, stopped and crashes. # https://www.erlang.org/doc/apps/sasl/error_logging#sasl-reports @@ -374,6 +380,12 @@ if Electric.telemetry_enabled?() do %{ config: %{ resource: %{name: "logs"}, + # Cap the OLP mailbox so log bursts shed events instead of letting + # the queue grow unbounded. `sync_mode_qlen` is intentionally omitted: + # OtelMetricExporter.LogHandler forces `sync_mode_qlen == drop_mode_qlen`, + # so setting it is a no-op. + drop_mode_qlen: 2000, + flush_qlen: 5000, metadata_map: %{ request_id: "http.request_id", stack_id: "source_id", diff --git a/packages/sync-service/lib/electric/application.ex b/packages/sync-service/lib/electric/application.ex index 2221552035..c46b55c7b9 100644 --- a/packages/sync-service/lib/electric/application.ex +++ b/packages/sync-service/lib/electric/application.ex @@ -43,7 +43,15 @@ defmodule Electric.Application do Logger.add_handlers(:electric) if Code.ensure_loaded?(Electric.Telemetry.Sentry) do - Electric.Telemetry.Sentry.add_logger_handler() + # Cap the Sentry transport sender backlog to shed load instead of letting + # queued Sentry events grow unbounded during error bursts. `sync_threshold: + # nil` disables the default sync-mode switch (which would block the logging + # process) so we rely solely on discard. + Electric.Telemetry.Sentry.add_logger_handler( + Electric.Telemetry.Sentry.default_handler_id(), + discard_threshold: 2000, + sync_threshold: nil + ) end config = configuration()