From ce59bc20f4af194f4e360fd55db99013655432b9 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Fri, 15 May 2026 15:57:30 +0800 Subject: [PATCH 01/18] *: add keyspace observability labels --- cmd/tidb-server/BUILD.bazel | 9 +- cmd/tidb-server/main.go | 89 +++++++++++ cmd/tidb-server/main_test.go | 57 +++++++ pkg/config/BUILD.bazel | 2 +- pkg/config/config.go | 143 ++++++++++++++++++ pkg/config/config.toml.example | 8 + pkg/config/config.toml.nextgen.example | 8 + pkg/config/config_test.go | 114 ++++++++++++++ pkg/metrics/common/wrapper.go | 8 + pkg/sessionctx/variable/slow_log.go | 4 + pkg/sessionctx/variable/tests/session_test.go | 14 ++ pkg/util/metricsutil/BUILD.bazel | 16 +- pkg/util/metricsutil/common.go | 74 +++------ pkg/util/metricsutil/common_test.go | 43 ++++++ pkg/util/stmtsummary/v2/BUILD.bazel | 1 + pkg/util/stmtsummary/v2/logger.go | 29 +++- pkg/util/stmtsummary/v2/record_test.go | 20 +++ 17 files changed, 583 insertions(+), 56 deletions(-) create mode 100644 pkg/util/metricsutil/common_test.go diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index 4ff8cdac2f33f..d163d7a3981cf 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -19,6 +19,7 @@ go_library( "//pkg/keyspace", "//pkg/kv", "//pkg/metrics", + "//pkg/metrics/common", "//pkg/parser/mysql", "//pkg/parser/terror", "//pkg/parser/types", @@ -67,11 +68,16 @@ go_library( "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_pingcap_log//:log", "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_golang//prometheus/push", + "@com_github_tikv_client_go_v2//config", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//txnkv/transaction", + "@com_github_tikv_pd_client//:client", + "@com_github_tikv_pd_client//opt", + "@com_github_tikv_pd_client//pkg/caller", "@org_uber_go_automaxprocs//maxprocs", "@org_uber_go_zap//:zap", ], @@ -107,7 +113,7 @@ go_test( srcs = ["main_test.go"], embed = [":tidb-server_lib"], flaky = True, - shard_count = 6, + shard_count = 9, deps = [ "//pkg/config", "//pkg/config/deploymode", @@ -116,6 +122,7 @@ go_test( "//pkg/sessionctx/vardef", "//pkg/sessionctx/variable", "//pkg/testkit/testsetup", + "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_stretchr_testify//require", "@io_opencensus_go//stats/view", "@org_uber_go_goleak//:goleak", diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 19593cc379599..62ac3c91c3383 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -19,6 +19,7 @@ import ( "flag" "fmt" "io/fs" + "maps" "os" "runtime" "strconv" @@ -30,6 +31,7 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/log" "github.com/pingcap/tidb/pkg/bindinfo" "github.com/pingcap/tidb/pkg/config" @@ -44,6 +46,7 @@ import ( "github.com/pingcap/tidb/pkg/keyspace" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/metrics" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/terror" parsertypes "github.com/pingcap/tidb/pkg/parser/types" @@ -90,8 +93,12 @@ import ( repository "github.com/pingcap/tidb/pkg/util/workloadrepo" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/push" + tikvconfig "github.com/tikv/client-go/v2/config" "github.com/tikv/client-go/v2/tikv" "github.com/tikv/client-go/v2/txnkv/transaction" + pd "github.com/tikv/pd/client" + "github.com/tikv/pd/client/opt" + "github.com/tikv/pd/client/pkg/caller" "go.uber.org/automaxprocs/maxprocs" "go.uber.org/zap" ) @@ -335,6 +342,8 @@ func main() { signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) + err = prepareKeyspaceObservability() + terror.MustNil(err) err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1146,6 +1155,86 @@ func closeStmtSummary() { } } +var keyspaceMetaComponentName = caller.Component("tidb-keyspace-meta") + +const ( + keyspaceIDMetricLabel = "keyspace_id" + keyspaceNameMetricLabel = "keyspace_name" +) + +func prepareKeyspaceObservability() error { + cfg := config.GetGlobalConfig() + if !kerneltype.IsNextGen() { + return nil + } + if keyspace.IsKeyspaceNameEmpty(cfg.KeyspaceName) || cfg.Store != config.StoreTypeTiKV { + return nil + } + metricscommon.SetConstLabels(keyspaceNameMetricLabel, cfg.KeyspaceName) + pdAddrs, _, _, err := tikvconfig.ParsePath("tikv://" + cfg.Path) + if err != nil { + return err + } + timeoutSec := time.Duration(cfg.PDClient.PDServerTimeout) * time.Second + pdCli, err := pd.NewClient(keyspaceMetaComponentName, pdAddrs, pd.SecurityOption{ + CAPath: cfg.Security.ClusterSSLCA, + CertPath: cfg.Security.ClusterSSLCert, + KeyPath: cfg.Security.ClusterSSLKey, + }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels())) + if err != nil { + return err + } + defer pdCli.Close() + + keyspaceMeta, err := getKeyspaceMeta(pdCli, cfg.KeyspaceName) + if err != nil { + return err + } + return prepareKeyspaceObservabilityWithKeyspaceMeta(keyspaceMeta, cfg.KeyspaceName, deploymode.IsStarter()) +} + +func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { + var keyspaceMeta *keyspacepb.KeyspaceMeta + err := util.RunWithRetry(util.DefaultMaxRetries, util.RetryInterval, func() (bool, error) { + var errInner error + keyspaceMeta, errInner = pdCli.LoadKeyspace(context.TODO(), keyspaceName) + if kvstore.IsNotBootstrappedError(errInner) || kvstore.IsKeyspaceNotExistError(errInner) { + return true, errInner + } + return false, errInner + }) + if err != nil { + return nil, err + } + return keyspaceMeta, nil +} + +func prepareKeyspaceObservabilityWithKeyspaceMeta(keyspaceMeta *keyspacepb.KeyspaceMeta, keyspaceName string, includeConfiguredFields bool) error { + if keyspaceMeta == nil { + return nil + } + resolvedValues := config.KeyspaceObservabilityValues{ + MetricLabels: map[string]string{ + keyspaceIDMetricLabel: fmt.Sprint(keyspaceMeta.GetId()), + keyspaceNameMetricLabel: keyspaceName, + }, + } + if includeConfiguredFields { + copiedConfig := *config.GetGlobalConfig() + if err := copiedConfig.ResolveKeyspaceObservability(keyspaceMeta.GetConfig()); err != nil { + return err + } + configuredValues := copiedConfig.KeyspaceObservabilityValues.Clone() + maps.Copy(resolvedValues.MetricLabels, configuredValues.MetricLabels) + resolvedValues.SlowLogFields = configuredValues.SlowLogFields + resolvedValues.StmtLogFields = configuredValues.StmtLogFields + } + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservabilityValues = resolvedValues + }) + return nil +} + func enablePyroscope() { if os.Getenv("PYROSCOPE_SERVER_ADDRESS") != "" { runtime.SetMutexProfileFraction(5) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 06aeafea45487..4f3033c381701 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -18,6 +18,7 @@ import ( "os" "testing" + "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/deploymode" "github.com/pingcap/tidb/pkg/config/kerneltype" @@ -154,3 +155,59 @@ func TestSetVersionByConfigNormalizeLegacyPlaceholderForNextGen(t *testing.T) { require.Equal(t, "v26.3.0", mysql.TiDBReleaseVersion) require.Equal(t, "8.0.11-TiDB-CLOUD.202603.0", mysql.ServerVersion) } + +func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { + restore := config.RestoreFunc() + defer restore() + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservability = config.KeyspaceObservability{ + Fields: []config.KeyspaceObservabilityField{{ + Source: "meta_a", + MetricLabel: "label_a", + SlowLogField: "Slow_meta_a", + StmtLogField: "stmt_meta_a", + Required: true, + }}, + } + }) + + err := prepareKeyspaceObservabilityWithKeyspaceMeta(&keyspacepb.KeyspaceMeta{ + Id: 42, + Config: map[string]string{"meta_a": "value_a"}, + }, "ks", true) + require.NoError(t, err) + + cfg := config.GetGlobalConfig() + require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks", "label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "Slow_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilityStmtLogFields()) +} + +func TestSetupKeyspaceObservabilityForNonStarter(t *testing.T) { + restore := config.RestoreFunc() + defer restore() + + err := prepareKeyspaceObservabilityWithKeyspaceMeta(&keyspacepb.KeyspaceMeta{ + Id: 42, + Config: map[string]string{"meta_a": "value_a"}, + }, "ks", false) + require.NoError(t, err) + + cfg := config.GetGlobalConfig() + require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks"}, cfg.GetKeyspaceObservabilityMetricLabels()) + require.Empty(t, cfg.GetKeyspaceObservabilitySlowLogFields()) + require.Empty(t, cfg.GetKeyspaceObservabilityStmtLogFields()) +} + +func TestSetupKeyspaceObservabilityForStartSkipsClassic(t *testing.T) { + restore := config.RestoreFunc() + defer restore() + config.UpdateGlobal(func(conf *config.Config) { + conf.Store = config.StoreTypeTiKV + conf.Path = "invalid-pd-path" + conf.KeyspaceName = "test_keyspace" + }) + + require.NoError(t, prepareKeyspaceObservability()) + require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) +} diff --git a/pkg/config/BUILD.bazel b/pkg/config/BUILD.bazel index 57b2c3b89a90c..5ab743a7d063b 100644 --- a/pkg/config/BUILD.bazel +++ b/pkg/config/BUILD.bazel @@ -42,7 +42,7 @@ go_test( data = glob(["**"]), embed = [":config"], flaky = True, - shard_count = 32, + shard_count = 34, deps = [ "//pkg/config/deploymode", "//pkg/config/kerneltype", diff --git a/pkg/config/config.go b/pkg/config/config.go index a069b87e82ef2..764e2287f2fc9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -262,6 +262,9 @@ type Config struct { // key will be the default value of the session variable `txn_scope` for this tidb-server. Labels map[string]string `toml:"labels" json:"labels"` + KeyspaceObservability KeyspaceObservability `toml:"keyspace-observability" json:"keyspace-observability"` + KeyspaceObservabilityValues KeyspaceObservabilityValues `toml:"-" json:"-"` + // EnableGlobalIndex is deprecated. EnableGlobalIndex bool `toml:"enable-global-index" json:"enable-global-index"` @@ -433,6 +436,143 @@ func encodeDefTempStorageDir(tempDir string, host, statusHost string, port, stat return filepath.Join(tempDir, osUID+"_tidb", dirName, "tmp-storage") } +// KeyspaceObservability maps metadata entries to observability outputs. +type KeyspaceObservability struct { + Fields []KeyspaceObservabilityField `toml:"fields" json:"fields"` +} + +// KeyspaceObservabilityField describes one metadata entry mapping. +type KeyspaceObservabilityField struct { + Source string `toml:"source" json:"source"` + MetricLabel string `toml:"metric-label" json:"metric-label,omitempty"` + SlowLogField string `toml:"slow-log-field" json:"slow-log-field,omitempty"` + StmtLogField string `toml:"stmt-log-field" json:"stmt-log-field,omitempty"` + Required bool `toml:"required" json:"required"` +} + +// KeyspaceObservabilityValues stores resolved metadata values. +type KeyspaceObservabilityValues struct { + MetricLabels map[string]string `toml:"-" json:"-"` + SlowLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` + StmtLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` +} + +// KeyspaceObservabilityFieldPair stores one resolved output field. +type KeyspaceObservabilityFieldPair struct { + Key string + Value string +} + +// Valid validates metadata observability mappings. +func (o KeyspaceObservability) Valid() error { + metricLabels := make(map[string]struct{}, len(o.Fields)) + slowLogFields := make(map[string]struct{}, len(o.Fields)) + stmtLogFields := make(map[string]struct{}, len(o.Fields)) + for i, field := range o.Fields { + if field.Source == "" { + return fmt.Errorf("[keyspace-observability.fields.%d] source cannot be empty", i) + } + if field.MetricLabel == "" && field.SlowLogField == "" && field.StmtLogField == "" { + return fmt.Errorf("[keyspace-observability.fields.%d] at least one output must be set", i) + } + if field.MetricLabel != "" { + if !validPrometheusLabelName(field.MetricLabel) { + return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) + } + key := strings.ToLower(field.MetricLabel) + if _, ok := metricLabels[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated metric-label %q", i, field.MetricLabel) + } + metricLabels[key] = struct{}{} + } + if field.SlowLogField != "" { + if _, ok := slowLogFields[field.SlowLogField]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated slow-log-field %q", i, field.SlowLogField) + } + slowLogFields[field.SlowLogField] = struct{}{} + } + if field.StmtLogField != "" { + if _, ok := stmtLogFields[field.StmtLogField]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated stmt-log-field %q", i, field.StmtLogField) + } + stmtLogFields[field.StmtLogField] = struct{}{} + } + } + return nil +} + +func validPrometheusLabelName(label string) bool { + for i, r := range label { + if i == 0 { + if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' { + continue + } + return false + } + if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' || r >= '0' && r <= '9' { + continue + } + return false + } + return label != "" +} + +// ResolveKeyspaceObservability resolves configured output values from metadata. +func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { + resolved := KeyspaceObservabilityValues{ + MetricLabels: make(map[string]string), + } + for _, field := range c.KeyspaceObservability.Fields { + value, ok := values[field.Source] + if !ok { + if field.Required { + return fmt.Errorf("missing required keyspace metadata entry %q", field.Source) + } + continue + } + if field.MetricLabel != "" { + resolved.MetricLabels[field.MetricLabel] = value + } + if field.SlowLogField != "" { + resolved.SlowLogFields = append(resolved.SlowLogFields, KeyspaceObservabilityFieldPair{Key: field.SlowLogField, Value: value}) + } + if field.StmtLogField != "" { + resolved.StmtLogFields = append(resolved.StmtLogFields, KeyspaceObservabilityFieldPair{Key: field.StmtLogField, Value: value}) + } + } + c.KeyspaceObservabilityValues = resolved.Clone() + return nil +} + +// Clone returns a deep copy of resolved metadata observability values. +func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { + res := KeyspaceObservabilityValues{} + if len(v.MetricLabels) > 0 { + res.MetricLabels = make(map[string]string, len(v.MetricLabels)) + for k, value := range v.MetricLabels { + res.MetricLabels[k] = value + } + } + res.SlowLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.SlowLogFields...) + res.StmtLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.StmtLogFields...) + return res +} + +// GetKeyspaceObservabilityMetricLabels returns resolved metric labels. +func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { + return c.KeyspaceObservabilityValues.Clone().MetricLabels +} + +// GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields. +func (c *Config) GetKeyspaceObservabilitySlowLogFields() []KeyspaceObservabilityFieldPair { + return c.KeyspaceObservabilityValues.Clone().SlowLogFields +} + +// GetKeyspaceObservabilityStmtLogFields returns resolved statement log fields. +func (c *Config) GetKeyspaceObservabilityStmtLogFields() []KeyspaceObservabilityFieldPair { + return c.KeyspaceObservabilityValues.Clone().StmtLogFields +} + // nullableBool defaults unset bool options to unset instead of false, which enables us to know if the user has set 2 // conflict options at the same time. type nullableBool struct { @@ -1479,6 +1619,9 @@ func (c *Config) Valid() error { if c.DXFResourceLimit != DefDXFResourceLimit && c.DeployMode != deploymode.PremiumReserved { return fmt.Errorf("dxf-resource-limit can only be configured when deploy-mode is premium_reserved") } + if err := c.KeyspaceObservability.Valid(); err != nil { + return err + } if c.Store == StoreTypeMockTiKV && !c.Instance.TiDBEnableDDL.Load() { return fmt.Errorf("can't disable DDL on mocktikv") } diff --git a/pkg/config/config.toml.example b/pkg/config/config.toml.example index 70d5f1ad6916d..0cc634783250d 100644 --- a/pkg/config/config.toml.example +++ b/pkg/config/config.toml.example @@ -480,6 +480,14 @@ tikv-raftstore-store-write-trigger-wb-bytes = 0.00006100 tikv-storage-processed-keys-batch-get = 0.00266791 tikv-storage-processed-keys-get = 0.01416829 +# Map selected keyspace metadata entries to observability outputs. +# [[keyspace-observability.fields]] +# source = "meta_key" +# metric-label = "metric_label" +# slow-log-field = "Slow_log_field" +# stmt-log-field = "stmt_log_field" +# required = false + # instance scope variables # These options are also available as a system variable for online configuration # changes to the system variable do not persist to the cluster. You must make changes diff --git a/pkg/config/config.toml.nextgen.example b/pkg/config/config.toml.nextgen.example index f8ad031a58d92..39bb18bf4d7cf 100644 --- a/pkg/config/config.toml.nextgen.example +++ b/pkg/config/config.toml.nextgen.example @@ -446,6 +446,14 @@ allow-expression-index = false # engines means allow the tidb server read data from which types of engines. options: "tikv", "tiflash", "tidb". engines = ["tikv", "tiflash", "tidb"] +# Map selected keyspace metadata entries to observability outputs. +# [[keyspace-observability.fields]] +# source = "meta_key" +# metric-label = "metric_label" +# slow-log-field = "Slow_log_field" +# stmt-log-field = "stmt_log_field" +# required = false + # instance scope variables # These options are also available as a system variable for online configuration # changes to the system variable do not persist to the cluster. You must make changes diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 5628ff4a6242d..856ea1a49f4ff 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -170,6 +170,120 @@ disable-error-stack = false `, nbFalse, nbUnset, nbUnset, nbUnset, false, true) } +func TestKeyspaceObservability(t *testing.T) { + conf := NewConfig() + content := ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "label_a" +slow-log-field = "Slow_meta_a" +stmt-log-field = "stmt_meta_a" +required = true + +[[keyspace-observability.fields]] +source = "meta_b" +metric-label = "label_b" +` + _, err := toml.Decode(content, conf) + require.NoError(t, err) + require.NoError(t, conf.Valid()) + require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{ + "meta_a": "value_a", + "meta_b": "value_b", + })) + require.Equal(t, map[string]string{"label_a": "value_a", "label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, []KeyspaceObservabilityFieldPair{{Key: "Slow_meta_a", Value: "value_a"}}, conf.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, []KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, conf.GetKeyspaceObservabilityStmtLogFields()) + + metricLabels := conf.GetKeyspaceObservabilityMetricLabels() + metricLabels["label_a"] = "changed" + require.Equal(t, "value_a", conf.GetKeyspaceObservabilityMetricLabels()["label_a"]) + + require.ErrorContains(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_b": "value_b"}), `missing required keyspace metadata entry "meta_a"`) +} + +func TestKeyspaceObservabilityInvalid(t *testing.T) { + tests := []struct { + name string + content string + err string + }{ + { + name: "empty source", + content: ` +[[keyspace-observability.fields]] +source = "" +metric-label = "label_a" +`, + err: "source cannot be empty", + }, + { + name: "empty output", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +`, + err: "at least one output must be set", + }, + { + name: "invalid label", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "1_label" +`, + err: `invalid metric-label "1_label"`, + }, + { + name: "duplicate label", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "label_a" + +[[keyspace-observability.fields]] +source = "meta_b" +metric-label = "LABEL_A" +`, + err: `duplicated metric-label "LABEL_A"`, + }, + { + name: "duplicate slow log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +slow-log-field = "Slow_meta" + +[[keyspace-observability.fields]] +source = "meta_b" +slow-log-field = "Slow_meta" +`, + err: `duplicated slow-log-field "Slow_meta"`, + }, + { + name: "duplicate stmt log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +stmt-log-field = "stmt_meta" + +[[keyspace-observability.fields]] +source = "meta_b" +stmt-log-field = "stmt_meta" +`, + err: `duplicated stmt-log-field "stmt_meta"`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := NewConfig() + _, err := toml.Decode(tt.content, conf) + require.NoError(t, err) + require.ErrorContains(t, conf.Valid(), tt.err) + }) + } +} + func TestRemovedVariableCheck(t *testing.T) { configTest := []struct { options string diff --git a/pkg/metrics/common/wrapper.go b/pkg/metrics/common/wrapper.go index f668ceb7c7458..d9ab537bffb83 100644 --- a/pkg/metrics/common/wrapper.go +++ b/pkg/metrics/common/wrapper.go @@ -53,6 +53,14 @@ func SetConstLabels(kv ...string) { } } +// SetConstLabelsFromMap sets constant labels for metrics from a map. +func SetConstLabelsFromMap(labels map[string]string) { + constLabels = make(prometheus.Labels, len(labels)) + for k, v := range labels { + constLabels[strings.ToLower(k)] = v + } +} + // NewCounter wraps a prometheus.NewCounter. func NewCounter(opts prometheus.CounterOpts) prometheus.Counter { opts.ConstLabels = constLabels diff --git a/pkg/sessionctx/variable/slow_log.go b/pkg/sessionctx/variable/slow_log.go index 7db2b4b2340b9..c4391885c6857 100644 --- a/pkg/sessionctx/variable/slow_log.go +++ b/pkg/sessionctx/variable/slow_log.go @@ -29,6 +29,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/terror" "github.com/pingcap/tidb/pkg/sessionctx/slowlogrule" @@ -589,6 +590,9 @@ func (s *SessionVars) SlowLogFormat(logItems *SlowQueryLogItems) string { if logItems.PrevStmt != "" { writeSlowLogItem(&buf, SlowLogPrevStmt, logItems.PrevStmt) } + for _, field := range config.GetGlobalConfig().GetKeyspaceObservabilitySlowLogFields() { + writeSlowLogItem(&buf, field.Key, field.Value) + } if s.CurrentDBChanged { fmt.Fprintf(&buf, "use %s;\n", strings.ToLower(s.CurrentDB)) diff --git a/pkg/sessionctx/variable/tests/session_test.go b/pkg/sessionctx/variable/tests/session_test.go index ffa0562724f8e..bcd2713106816 100644 --- a/pkg/sessionctx/variable/tests/session_test.go +++ b/pkg/sessionctx/variable/tests/session_test.go @@ -385,6 +385,20 @@ func TestSlowLogFormat(t *testing.T) { // Restore for subsequent assertions. logItems.SessionConnectAttrs = nil + restore := config.RestoreFunc() + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservability = config.KeyspaceObservability{ + Fields: []config.KeyspaceObservabilityField{{ + Source: "meta_a", + SlowLogField: "Slow_meta_a", + }}, + } + require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_a": "value_a"})) + }) + logString = seVar.SlowLogFormat(logItems) + require.Equal(t, resultFields+"\n"+"# Slow_meta_a: value_a\n"+sql, logString) + restore() + // test PrepareSlowLogItemsForRules and CompleteSlowLogItemsForRules seVar.SlowLogRules = slowlogrule.NewSessionSlowLogRules(&slowlogrule.SlowLogRules{ Fields: map[string]struct{}{ diff --git a/pkg/util/metricsutil/BUILD.bazel b/pkg/util/metricsutil/BUILD.bazel index ac57d68fa038b..1640bb09e51e4 100644 --- a/pkg/util/metricsutil/BUILD.bazel +++ b/pkg/util/metricsutil/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "metricsutil", @@ -29,9 +29,21 @@ go_library( "//pkg/util", "//pkg/util/topsql/reporter/metrics", "@com_github_pingcap_kvproto//pkg/keyspacepb", - "@com_github_tikv_client_go_v2//config", "@com_github_tikv_pd_client//:client", "@com_github_tikv_pd_client//opt", "@com_github_tikv_pd_client//pkg/caller", ], ) + +go_test( + name = "metricsutil_test", + timeout = "short", + srcs = ["common_test.go"], + embed = [":metricsutil"], + flaky = True, + deps = [ + "//pkg/config", + "//pkg/metrics/common", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 42e38e31cfff3..cd17f7c1f2739 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -17,6 +17,7 @@ package metricsutil import ( "context" "fmt" + "maps" "time" "github.com/pingcap/kvproto/pkg/keyspacepb" @@ -42,7 +43,6 @@ import ( ttlmetrics "github.com/pingcap/tidb/pkg/ttl/metrics" "github.com/pingcap/tidb/pkg/util" topsqlreporter_metrics "github.com/pingcap/tidb/pkg/util/topsql/reporter/metrics" - tikvconfig "github.com/tikv/client-go/v2/config" pd "github.com/tikv/pd/client" "github.com/tikv/pd/client/opt" "github.com/tikv/pd/client/pkg/caller" @@ -50,52 +50,21 @@ import ( var componentName = caller.Component("tidb-metrics-util") -// RegisterMetrics register metrics with const label 'keyspace_id' if keyspaceName set. +const defaultKeyspaceLabel = "keyspace_id" + +// RegisterMetrics registers metrics with keyspace metadata labels when available. func RegisterMetrics() error { cfg := config.GetGlobalConfig() - if keyspace.IsKeyspaceNameEmpty(cfg.KeyspaceName) || cfg.Store != config.StoreTypeTiKV { - registerMetrics(nil) // register metrics without label 'keyspace_id'. - return nil - } - - if kerneltype.IsNextGen() { + if !keyspace.IsKeyspaceNameEmpty(cfg.KeyspaceName) && kerneltype.IsNextGen() { metricscommon.SetConstLabels("keyspace_name", cfg.KeyspaceName) } - - pdAddrs, _, _, err := tikvconfig.ParsePath("tikv://" + cfg.Path) - if err != nil { - return err - } - - timeoutSec := time.Duration(cfg.PDClient.PDServerTimeout) * time.Second - // Note: for NextGen, we need to use the side effect of `NewClient` to init the metrics' builtin const labels - pdCli, err := pd.NewClient(componentName, pdAddrs, pd.SecurityOption{ - CAPath: cfg.Security.ClusterSSLCA, - CertPath: cfg.Security.ClusterSSLCert, - KeyPath: cfg.Security.ClusterSSLKey, - }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels())) - if err != nil { - return err - } - defer pdCli.Close() - - if kerneltype.IsNextGen() { - registerMetrics(nil) // metrics' const label already set - } else { - keyspaceMeta, err := getKeyspaceMeta(pdCli, cfg.KeyspaceName) - if err != nil { - return err - } - registerMetrics(keyspaceMeta) - } - return nil + return registerMetrics() } -// RegisterMetricsForBR register metrics with const label keyspace_id for BR. +// RegisterMetricsForBR registers metrics with keyspace metadata labels for BR. func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName string) error { if keyspace.IsKeyspaceNameEmpty(keyspaceName) { - registerMetrics(nil) // register metrics without label 'keyspace_id'. - return nil + return registerMetrics() } if kerneltype.IsNextGen() { @@ -115,16 +84,16 @@ func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName str } defer pdCli.Close() - if kerneltype.IsNextGen() { - registerMetrics(nil) // metrics' const label already set - } else { - keyspaceMeta, err := getKeyspaceMeta(pdCli, keyspaceName) - if err != nil { - return err - } - registerMetrics(keyspaceMeta) + keyspaceMeta, err := getKeyspaceMeta(pdCli, keyspaceName) + if err != nil { + return err } - return nil + if !kerneltype.IsNextGen() { + labels := maps.Clone(metricscommon.GetConstLabels()) + labels[defaultKeyspaceLabel] = fmt.Sprint(keyspaceMeta.GetId()) + metricscommon.SetConstLabelsFromMap(labels) + } + return registerMetrics() } func initMetrics() { @@ -150,11 +119,14 @@ func initMetrics() { } } -func registerMetrics(keyspaceMeta *keyspacepb.KeyspaceMeta) { - if keyspaceMeta != nil { - metricscommon.SetConstLabels("keyspace_id", fmt.Sprint(keyspaceMeta.GetId())) +func registerMetrics() error { + labels := maps.Clone(metricscommon.GetConstLabels()) + maps.Copy(labels, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) + if len(labels) > 0 { + metricscommon.SetConstLabelsFromMap(labels) } initMetrics() + return nil } func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { diff --git a/pkg/util/metricsutil/common_test.go b/pkg/util/metricsutil/common_test.go new file mode 100644 index 0000000000000..a8f40cea5d82b --- /dev/null +++ b/pkg/util/metricsutil/common_test.go @@ -0,0 +1,43 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metricsutil + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/config" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" + "github.com/stretchr/testify/require" +) + +func TestRegisterMetricsWithKeyspaceObservabilityValues(t *testing.T) { + restore := config.RestoreFunc() + defer restore() + t.Cleanup(func() { + metricscommon.SetConstLabels() + }) + + metricscommon.SetConstLabels("base_label", "base_value") + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservabilityValues = config.KeyspaceObservabilityValues{ + MetricLabels: map[string]string{"label_a": "value_a"}, + } + }) + + require.NoError(t, registerMetrics()) + labels := metricscommon.GetConstLabels() + require.Equal(t, "base_value", labels["base_label"]) + require.Equal(t, "value_a", labels["label_a"]) +} diff --git a/pkg/util/stmtsummary/v2/BUILD.bazel b/pkg/util/stmtsummary/v2/BUILD.bazel index 1abfaf8d31663..db4ef2262fcd0 100644 --- a/pkg/util/stmtsummary/v2/BUILD.bazel +++ b/pkg/util/stmtsummary/v2/BUILD.bazel @@ -51,6 +51,7 @@ go_test( flaky = True, shard_count = 15, deps = [ + "//pkg/config", "//pkg/meta/model", "//pkg/metrics", "//pkg/parser/ast", diff --git a/pkg/util/stmtsummary/v2/logger.go b/pkg/util/stmtsummary/v2/logger.go index 64c3499c6ba28..f3610d06c411b 100644 --- a/pkg/util/stmtsummary/v2/logger.go +++ b/pkg/util/stmtsummary/v2/logger.go @@ -20,6 +20,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/util/logutil" "go.uber.org/zap" "go.uber.org/zap/buffer" @@ -71,7 +72,7 @@ func (s *stmtLogStorage) sync() error { } func (s *stmtLogStorage) log(r *StmtRecord) { - b, err := json.Marshal(r) + b, err := marshalStmtRecord(r) if err != nil { logutil.BgLogger().Warn("failed to marshal statement summary", zap.Error(err)) return @@ -79,6 +80,32 @@ func (s *stmtLogStorage) log(r *StmtRecord) { s.logger.Info(string(b)) } +func marshalStmtRecord(r *StmtRecord) ([]byte, error) { + fields := config.GetGlobalConfig().GetKeyspaceObservabilityStmtLogFields() + if len(fields) == 0 { + return json.Marshal(r) + } + b, err := json.Marshal(r) + if err != nil { + return nil, err + } + if !json.Valid(b) || len(b) < 2 || b[0] != '{' || b[len(b)-1] != '}' { + return b, nil + } + items := make(map[string]json.RawMessage) + if err := json.Unmarshal(b, &items); err != nil { + return nil, err + } + for _, field := range fields { + value, err := json.Marshal(field.Value) + if err != nil { + return nil, err + } + items[field.Key] = value + } + return json.Marshal(items) +} + type stmtLogEncoder struct{} func (*stmtLogEncoder) EncodeEntry(entry zapcore.Entry, _ []zapcore.Field) (*buffer.Buffer, error) { diff --git a/pkg/util/stmtsummary/v2/record_test.go b/pkg/util/stmtsummary/v2/record_test.go index fbb65feb5468c..9b3700497b4f9 100644 --- a/pkg/util/stmtsummary/v2/record_test.go +++ b/pkg/util/stmtsummary/v2/record_test.go @@ -15,8 +15,10 @@ package stmtsummary import ( + "encoding/json" "testing" + "github.com/pingcap/tidb/pkg/config" "github.com/stretchr/testify/require" ) @@ -83,4 +85,22 @@ func TestStmtRecord(t *testing.T) { require.Equal(t, info.TotalRUV2*2, record2.SumRUV2) require.Equal(t, info.CPUUsages.TidbCPUTime*2, record2.SumTidbCPU) require.Equal(t, info.CPUUsages.TikvCPUTime*2, record2.SumTikvCPU) + + restore := config.RestoreFunc() + defer restore() + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservability = config.KeyspaceObservability{ + Fields: []config.KeyspaceObservabilityField{{ + Source: "meta_a", + StmtLogField: "stmt_meta_a", + }}, + } + require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_a": "value_a"})) + }) + b, err := marshalStmtRecord(record2) + require.NoError(t, err) + items := make(map[string]any) + require.NoError(t, json.Unmarshal(b, &items)) + require.Equal(t, "value_a", items["stmt_meta_a"]) + require.Equal(t, record2.Digest, items["digest"]) } From 0b4ee2feab07d0fb9dee6ba5de2a8c3df7e2bfa7 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 18 May 2026 14:20:23 +0800 Subject: [PATCH 02/18] *: address keyspace observability review comments --- cmd/tidb-server/main_test.go | 4 ++++ pkg/config/config.go | 3 +++ pkg/config/config_test.go | 9 +++++++++ pkg/sessionctx/variable/tests/session_test.go | 2 +- pkg/util/metricsutil/common.go | 12 ++++++++++-- pkg/util/metricsutil/common_test.go | 6 +++++- 6 files changed, 32 insertions(+), 4 deletions(-) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 4f3033c381701..053a9ec419843 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -200,6 +200,10 @@ func TestSetupKeyspaceObservabilityForNonStarter(t *testing.T) { } func TestSetupKeyspaceObservabilityForStartSkipsClassic(t *testing.T) { + if !kerneltype.IsClassic() { + t.Skip("only verifies the classic-mode short-circuit path") + } + restore := config.RestoreFunc() defer restore() config.UpdateGlobal(func(conf *config.Config) { diff --git a/pkg/config/config.go b/pkg/config/config.go index 764e2287f2fc9..7025da614fa09 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -480,6 +480,9 @@ func (o KeyspaceObservability) Valid() error { return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) } key := strings.ToLower(field.MetricLabel) + if key == "keyspace_id" || key == "keyspace_name" { + return fmt.Errorf("[keyspace-observability.fields.%d] reserved metric-label %q", i, field.MetricLabel) + } if _, ok := metricLabels[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] duplicated metric-label %q", i, field.MetricLabel) } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 856ea1a49f4ff..dfc9785c28a4c 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -247,6 +247,15 @@ metric-label = "LABEL_A" `, err: `duplicated metric-label "LABEL_A"`, }, + { + name: "reserved label", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "KEYSPACE_ID" +`, + err: `reserved metric-label "KEYSPACE_ID"`, + }, { name: "duplicate slow log field", content: ` diff --git a/pkg/sessionctx/variable/tests/session_test.go b/pkg/sessionctx/variable/tests/session_test.go index bcd2713106816..a77c4ed466e15 100644 --- a/pkg/sessionctx/variable/tests/session_test.go +++ b/pkg/sessionctx/variable/tests/session_test.go @@ -386,6 +386,7 @@ func TestSlowLogFormat(t *testing.T) { logItems.SessionConnectAttrs = nil restore := config.RestoreFunc() + defer restore() config.UpdateGlobal(func(conf *config.Config) { conf.KeyspaceObservability = config.KeyspaceObservability{ Fields: []config.KeyspaceObservabilityField{{ @@ -397,7 +398,6 @@ func TestSlowLogFormat(t *testing.T) { }) logString = seVar.SlowLogFormat(logItems) require.Equal(t, resultFields+"\n"+"# Slow_meta_a: value_a\n"+sql, logString) - restore() // test PrepareSlowLogItemsForRules and CompleteSlowLogItemsForRules seVar.SlowLogRules = slowlogrule.NewSessionSlowLogRules(&slowlogrule.SlowLogRules{ diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index cd17f7c1f2739..4260895273b38 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -89,7 +89,7 @@ func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName str return err } if !kerneltype.IsNextGen() { - labels := maps.Clone(metricscommon.GetConstLabels()) + labels := cloneConstLabels() labels[defaultKeyspaceLabel] = fmt.Sprint(keyspaceMeta.GetId()) metricscommon.SetConstLabelsFromMap(labels) } @@ -120,7 +120,7 @@ func initMetrics() { } func registerMetrics() error { - labels := maps.Clone(metricscommon.GetConstLabels()) + labels := cloneConstLabels() maps.Copy(labels, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) if len(labels) > 0 { metricscommon.SetConstLabelsFromMap(labels) @@ -129,6 +129,14 @@ func registerMetrics() error { return nil } +func cloneConstLabels() map[string]string { + labels := maps.Clone(metricscommon.GetConstLabels()) + if labels == nil { + labels = make(map[string]string) + } + return labels +} + func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { // Load Keyspace meta with retry. var keyspaceMeta *keyspacepb.KeyspaceMeta diff --git a/pkg/util/metricsutil/common_test.go b/pkg/util/metricsutil/common_test.go index a8f40cea5d82b..96a6d6c727521 100644 --- a/pkg/util/metricsutil/common_test.go +++ b/pkg/util/metricsutil/common_test.go @@ -29,6 +29,10 @@ func TestRegisterMetricsWithKeyspaceObservabilityValues(t *testing.T) { metricscommon.SetConstLabels() }) + labels := cloneConstLabels() + labels["label_a"] = "value_a" + require.Equal(t, "value_a", labels["label_a"]) + metricscommon.SetConstLabels("base_label", "base_value") config.UpdateGlobal(func(conf *config.Config) { conf.KeyspaceObservabilityValues = config.KeyspaceObservabilityValues{ @@ -37,7 +41,7 @@ func TestRegisterMetricsWithKeyspaceObservabilityValues(t *testing.T) { }) require.NoError(t, registerMetrics()) - labels := metricscommon.GetConstLabels() + labels = metricscommon.GetConstLabels() require.Equal(t, "base_value", labels["base_label"]) require.Equal(t, "value_a", labels["label_a"]) } From ae42f5a2f282042e5d2d28eade2b61905db692d4 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 18 May 2026 14:55:15 +0800 Subject: [PATCH 03/18] *: tighten keyspace observability validation --- pkg/config/config.go | 322 ++++++++++++++++++++++++- pkg/config/config.toml.example | 1 + pkg/config/config.toml.nextgen.example | 1 + pkg/config/config_test.go | 60 ++++- 4 files changed, 377 insertions(+), 7 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 7025da614fa09..d0f761f9c9bf7 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -463,6 +463,288 @@ type KeyspaceObservabilityFieldPair struct { Value string } +var reservedKeyspaceObservabilityMetricLabels = map[string]struct{}{ + "keyspace_id": {}, + "keyspace_name": {}, + "account_lock": {}, + "action": {}, + "address": {}, + "cf": {}, + "cipher": {}, + "copr_type": {}, + "cte_type": {}, + "db": {}, + "event": {}, + "has_lock": {}, + "hash": {}, + "in_txn": {}, + "internal": {}, + "module": {}, + "name": {}, + "phase": {}, + "reason": {}, + "resource_group": {}, + "result": {}, + "scope": {}, + "sql_type": {}, + "stage": {}, + "status": {}, + "step": {}, + "store": {}, + "task": {}, + "txn_mode": {}, + "type": {}, + "version": {}, +} + +var reservedKeyspaceObservabilitySlowLogFields = map[string]struct{}{ + "backoff_detail": {}, + "backoff_time": {}, + "backoff_total": {}, + "backoff_types": {}, + "binary_plan": {}, + "commit_backoff_time": {}, + "commit_primary_rpc_detail": {}, + "commit_time": {}, + "compile_time": {}, + "conn_id": {}, + "cop_backoff_": {}, + "cop_mvcc_read_amplification": {}, + "cop_proc_addr": {}, + "cop_proc_avg": {}, + "cop_proc_max": {}, + "cop_proc_p90": {}, + "cop_time": {}, + "cop_wait_addr": {}, + "cop_wait_avg": {}, + "cop_wait_max": {}, + "cop_wait_p90": {}, + "db": {}, + "digest": {}, + "disk_max": {}, + "exec_retry_count": {}, + "exec_retry_time": {}, + "get_commit_ts_time": {}, + "get_latest_ts_time": {}, + "get_snapshot_time": {}, + "has_more_results": {}, + "host": {}, + "index_names": {}, + "is_internal": {}, + "isexplicittxn": {}, + "issyncstatsfailed": {}, + "iswritecachetable": {}, + "keyspace_id": {}, + "keyspace_name": {}, + "kv_total": {}, + "local_latch_wait_time": {}, + "lockkeys_time": {}, + "mem_arbitration": {}, + "mem_max": {}, + "num_cop_tasks": {}, + "opt_binding_match": {}, + "opt_logical": {}, + "opt_physical": {}, + "opt_stats_derive": {}, + "opt_stats_sync_wait": {}, + "optimize_time": {}, + "parse_time": {}, + "pd_total": {}, + "plan": {}, + "plan_digest": {}, + "plan_from_binding": {}, + "plan_from_cache": {}, + "preproc_subqueries": {}, + "preproc_subqueries_time": {}, + "prepared": {}, + "prewrite_backoff_types": {}, + "prewrite_region": {}, + "prewrite_time": {}, + "prev_stmt": {}, + "process_keys": {}, + "process_time": {}, + "query": {}, + "query_time": {}, + "request_count": {}, + "request_unit_read": {}, + "request_unit_v2": {}, + "request_unit_v2_detail": {}, + "request_unit_write": {}, + "resolve_lock_time": {}, + "resource_group": {}, + "result_rows": {}, + "rewrite_time": {}, + "rocksdb_block_cache_hit_count": {}, + "rocksdb_block_read_byte": {}, + "rocksdb_block_read_count": {}, + "rocksdb_block_read_time": {}, + "rocksdb_delete_skipped_count": {}, + "rocksdb_key_skipped_count": {}, + "session_alias": {}, + "session_connect_attrs": {}, + "slowest_prewrite_rpc_detail": {}, + "stats": {}, + "storage_from_kv": {}, + "storage_from_mpp": {}, + "succ": {}, + "tidb_cpu_time": {}, + "tikv_cpu_time": {}, + "time": {}, + "time_queued_by_rc": {}, + "total_keys": {}, + "txn_retry": {}, + "txn_start_ts": {}, + "unpacked_bytes_received_tiflash_cross_zone": {}, + "unpacked_bytes_received_tiflash_total": {}, + "unpacked_bytes_received_tikv_cross_zone": {}, + "unpacked_bytes_received_tikv_total": {}, + "unpacked_bytes_sent_tiflash_cross_zone": {}, + "unpacked_bytes_sent_tiflash_total": {}, + "unpacked_bytes_sent_tikv_cross_zone": {}, + "unpacked_bytes_sent_tikv_total": {}, + "user": {}, + "user@host": {}, + "wait_prewrite_binlog_time": {}, + "wait_time": {}, + "wait_ts": {}, + "warnings": {}, + "write_keys": {}, + "write_size": {}, + "write_sql_response_total": {}, +} + +var reservedKeyspaceObservabilitySlowLogFieldPrefixes = []string{ + "cop_backoff_", +} + +var reservedKeyspaceObservabilityStmtLogFields = map[string]struct{}{ + "auth_users": {}, + "backoff_types": {}, + "begin": {}, + "binding_digest": {}, + "binding_sql": {}, + "charset": {}, + "collation": {}, + "commit_count": {}, + "digest": {}, + "end": {}, + "exec_count": {}, + "exec_retry_count": {}, + "exec_retry_time": {}, + "first_seen": {}, + "index_names": {}, + "is_internal": {}, + "keyspace_id": {}, + "keyspace_name": {}, + "last_seen": {}, + "max_backoff_time": {}, + "max_commit_backoff_time": {}, + "max_commit_time": {}, + "max_compile_latency": {}, + "max_cop_process_address": {}, + "max_cop_process_time": {}, + "max_cop_wait_address": {}, + "max_cop_wait_time": {}, + "max_disk": {}, + "max_get_commit_ts_time": {}, + "max_latency": {}, + "max_local_latch_time": {}, + "max_mem": {}, + "max_mem_arbitration": {}, + "max_parse_latency": {}, + "max_prewrite_region_num": {}, + "max_prewrite_time": {}, + "max_process_time": {}, + "max_processed_keys": {}, + "max_resolve_lock_time": {}, + "max_result_rows": {}, + "max_rocksdb_block_cache_hit_count": {}, + "max_rocksdb_block_read_byte": {}, + "max_rocksdb_block_read_count": {}, + "max_rocksdb_delete_skipped_count": {}, + "max_rocksdb_key_skipped_count": {}, + "max_rru": {}, + "max_ru_wait_duration": {}, + "max_ruv2": {}, + "max_total_keys": {}, + "max_txn_retry": {}, + "max_wait_time": {}, + "max_write_keys": {}, + "max_write_size": {}, + "max_wru": {}, + "min_latency": {}, + "min_result_rows": {}, + "normalized_sql": {}, + "plan_cache_hits": {}, + "plan_cache_unqualified_count": {}, + "plan_cache_unqualified_last_reason": {}, + "plan_digest": {}, + "plan_hint": {}, + "plan_in_binding": {}, + "plan_in_cache": {}, + "prepared": {}, + "prev_sql": {}, + "resource_group_name": {}, + "sample_binary_plan": {}, + "sample_plan": {}, + "sample_sql": {}, + "schema_name": {}, + "stmt_type": {}, + "storage_kv": {}, + "storage_mpp": {}, + "sum_affected_rows": {}, + "sum_backoff_time": {}, + "sum_backoff_times": {}, + "sum_backoff_total": {}, + "sum_commit_backoff_time": {}, + "sum_commit_time": {}, + "sum_compile_latency": {}, + "sum_disk": {}, + "sum_errors": {}, + "sum_get_commit_ts_time": {}, + "sum_kv_total": {}, + "sum_latency": {}, + "sum_local_latch_time": {}, + "sum_mem": {}, + "sum_mem_arbitration": {}, + "sum_num_cop_tasks": {}, + "sum_parse_latency": {}, + "sum_pd_total": {}, + "sum_prewrite_region_num": {}, + "sum_prewrite_time": {}, + "sum_process_time": {}, + "sum_processed_keys": {}, + "sum_resolve_lock_time": {}, + "sum_result_rows": {}, + "sum_rocksdb_block_cache_hit_count": {}, + "sum_rocksdb_block_read_byte": {}, + "sum_rocksdb_block_read_count": {}, + "sum_rocksdb_delete_skipped_count": {}, + "sum_rocksdb_key_skipped_count": {}, + "sum_rru": {}, + "sum_ru_wait_duration": {}, + "sum_ruv2": {}, + "sum_tidb_cpu": {}, + "sum_tikv_cpu": {}, + "sum_total_keys": {}, + "sum_txn_retry": {}, + "sum_wait_time": {}, + "sum_warnings": {}, + "sum_write_keys": {}, + "sum_write_size": {}, + "sum_write_sql_resp_total": {}, + "sum_wru": {}, + "table_names": {}, + "unpacked_bytes_received_tiflash_cross_zone": {}, + "unpacked_bytes_received_tiflash_total": {}, + "unpacked_bytes_received_tikv_cross_zone": {}, + "unpacked_bytes_received_tikv_total": {}, + "unpacked_bytes_send_tiflash_cross_zone": {}, + "unpacked_bytes_send_tiflash_total": {}, + "unpacked_bytes_send_tikv_cross_zone": {}, + "unpacked_bytes_send_tikv_total": {}, +} + // Valid validates metadata observability mappings. func (o KeyspaceObservability) Valid() error { metricLabels := make(map[string]struct{}, len(o.Fields)) @@ -480,7 +762,7 @@ func (o KeyspaceObservability) Valid() error { return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) } key := strings.ToLower(field.MetricLabel) - if key == "keyspace_id" || key == "keyspace_name" { + if _, ok := reservedKeyspaceObservabilityMetricLabels[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] reserved metric-label %q", i, field.MetricLabel) } if _, ok := metricLabels[key]; ok { @@ -489,21 +771,48 @@ func (o KeyspaceObservability) Valid() error { metricLabels[key] = struct{}{} } if field.SlowLogField != "" { - if _, ok := slowLogFields[field.SlowLogField]; ok { + if !validKeyspaceObservabilityLogFieldName(field.SlowLogField) { + return fmt.Errorf("[keyspace-observability.fields.%d] invalid slow-log-field %q", i, field.SlowLogField) + } + key := strings.ToLower(field.SlowLogField) + if isReservedKeyspaceObservabilitySlowLogField(key) { + return fmt.Errorf("[keyspace-observability.fields.%d] reserved slow-log-field %q", i, field.SlowLogField) + } + if _, ok := slowLogFields[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] duplicated slow-log-field %q", i, field.SlowLogField) } - slowLogFields[field.SlowLogField] = struct{}{} + slowLogFields[key] = struct{}{} } if field.StmtLogField != "" { - if _, ok := stmtLogFields[field.StmtLogField]; ok { + key := strings.ToLower(field.StmtLogField) + if _, ok := reservedKeyspaceObservabilityStmtLogFields[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] reserved stmt-log-field %q", i, field.StmtLogField) + } + if _, ok := stmtLogFields[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] duplicated stmt-log-field %q", i, field.StmtLogField) } - stmtLogFields[field.StmtLogField] = struct{}{} + stmtLogFields[key] = struct{}{} } } return nil } +func isReservedKeyspaceObservabilitySlowLogField(field string) bool { + if _, ok := reservedKeyspaceObservabilitySlowLogFields[field]; ok { + return true + } + for _, prefix := range reservedKeyspaceObservabilitySlowLogFieldPrefixes { + if strings.HasPrefix(field, prefix) { + return true + } + } + return false +} + +func validKeyspaceObservabilityLogFieldName(field string) bool { + return validPrometheusLabelName(field) +} + func validPrometheusLabelName(label string) bool { for i, r := range label { if i == 0 { @@ -1616,6 +1925,9 @@ func (c *Config) Valid() error { if !kerneltype.IsNextGen() && c.DeployMode != deploymode.Premium { return fmt.Errorf("deploy-mode can only be configured for nextgen TiDB") } + if len(c.KeyspaceObservability.Fields) > 0 && c.DeployMode != deploymode.Starter { + return fmt.Errorf("keyspace-observability.fields can only be configured when deploy-mode is starter") + } if c.DXFResourceLimit < MinDXFResourceLimit || c.DXFResourceLimit > MaxDXFResourceLimit { return fmt.Errorf("dxf-resource-limit should be between %d and %d", MinDXFResourceLimit, MaxDXFResourceLimit) } diff --git a/pkg/config/config.toml.example b/pkg/config/config.toml.example index 0cc634783250d..64b0d551ddde3 100644 --- a/pkg/config/config.toml.example +++ b/pkg/config/config.toml.example @@ -481,6 +481,7 @@ tikv-storage-processed-keys-batch-get = 0.00266791 tikv-storage-processed-keys-get = 0.01416829 # Map selected keyspace metadata entries to observability outputs. +# Only valid when deploy-mode is starter. # [[keyspace-observability.fields]] # source = "meta_key" # metric-label = "metric_label" diff --git a/pkg/config/config.toml.nextgen.example b/pkg/config/config.toml.nextgen.example index 39bb18bf4d7cf..a04d6a19c0c39 100644 --- a/pkg/config/config.toml.nextgen.example +++ b/pkg/config/config.toml.nextgen.example @@ -447,6 +447,7 @@ allow-expression-index = false engines = ["tikv", "tiflash", "tidb"] # Map selected keyspace metadata entries to observability outputs. +# Only valid when deploy-mode is starter. # [[keyspace-observability.fields]] # source = "meta_key" # metric-label = "metric_label" diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index dfc9785c28a4c..7df4783ef41e3 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -186,7 +186,7 @@ metric-label = "label_b" ` _, err := toml.Decode(content, conf) require.NoError(t, err) - require.NoError(t, conf.Valid()) + require.NoError(t, conf.KeyspaceObservability.Valid()) require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{ "meta_a": "value_a", "meta_b": "value_b", @@ -256,6 +256,33 @@ metric-label = "KEYSPACE_ID" `, err: `reserved metric-label "KEYSPACE_ID"`, }, + { + name: "reserved metric variable label", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "TYPE" +`, + err: `reserved metric-label "TYPE"`, + }, + { + name: "reserved slow log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +slow-log-field = "Digest" +`, + err: `reserved slow-log-field "Digest"`, + }, + { + name: "invalid slow log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +slow-log-field = "Bad Field" +`, + err: `invalid slow-log-field "Bad Field"`, + }, { name: "duplicate slow log field", content: ` @@ -269,6 +296,15 @@ slow-log-field = "Slow_meta" `, err: `duplicated slow-log-field "Slow_meta"`, }, + { + name: "reserved stmt log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +stmt-log-field = "digest" +`, + err: `reserved stmt-log-field "digest"`, + }, { name: "duplicate stmt log field", content: ` @@ -288,9 +324,18 @@ stmt-log-field = "stmt_meta" conf := NewConfig() _, err := toml.Decode(tt.content, conf) require.NoError(t, err) - require.ErrorContains(t, conf.Valid(), tt.err) + require.ErrorContains(t, conf.KeyspaceObservability.Valid(), tt.err) }) } + + conf := NewConfig() + _, err := toml.Decode(` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "label_a" +`, conf) + require.NoError(t, err) + require.ErrorContains(t, conf.Valid(), "keyspace-observability.fields can only be configured when deploy-mode is starter") } func TestRemovedVariableCheck(t *testing.T) { @@ -1235,6 +1280,17 @@ dxf-resource-limit = 101`), 0644)) require.Equal(t, deploymode.Starter, conf.DeployMode) require.NoError(t, conf.Valid()) + require.NoError(t, os.WriteFile(configFile, []byte(`deploy-mode = "starter" + +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "label_a" +`), 0644)) + conf = NewConfig() + require.NoError(t, conf.Load(configFile)) + require.Equal(t, deploymode.Starter, conf.DeployMode) + require.NoError(t, conf.Valid()) + require.NoError(t, os.WriteFile(configFile, []byte(`deploy-mode = "unknown"`), 0644)) conf = NewConfig() require.ErrorContains(t, conf.Load(configFile), `invalid deploy mode "unknown"`) From 80ec6235a035c44ce6d5b1cd80375791b110af67 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 18 May 2026 15:09:01 +0800 Subject: [PATCH 04/18] *: skip pd metrics init when loading keyspace meta --- cmd/tidb-server/main.go | 2 +- pkg/util/metricsutil/common.go | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 62ac3c91c3383..397c1491a3bc1 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -1180,7 +1180,7 @@ func prepareKeyspaceObservability() error { CAPath: cfg.Security.ClusterSSLCA, CertPath: cfg.Security.ClusterSSLCert, KeyPath: cfg.Security.ClusterSSLKey, - }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels())) + }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels()), opt.WithInitMetricsOption(false)) if err != nil { return err } diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 4260895273b38..9dc98d6c892fc 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -76,9 +76,8 @@ func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName str if tls.IsEnabled() { securityOpt = tls.ToPDSecurityOption() } - // Note: for NextGen, pdCli is created to init the metrics' const labels pdCli, err := pd.NewClient(componentName, pdAddrs, securityOpt, - opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels())) + opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels()), opt.WithInitMetricsOption(false)) if err != nil { return err } From 2b4b179c97a7c2bec442ce1928e8e04ec29d7d3d Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 18 May 2026 15:19:43 +0800 Subject: [PATCH 05/18] *: remove pd metrics labels when loading keyspace meta --- cmd/tidb-server/main.go | 2 +- pkg/util/metricsutil/common.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 397c1491a3bc1..2987b2ef1a7dd 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -1180,7 +1180,7 @@ func prepareKeyspaceObservability() error { CAPath: cfg.Security.ClusterSSLCA, CertPath: cfg.Security.ClusterSSLCert, KeyPath: cfg.Security.ClusterSSLKey, - }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels()), opt.WithInitMetricsOption(false)) + }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithInitMetricsOption(false)) if err != nil { return err } diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 9dc98d6c892fc..628a8ed6a7730 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -77,7 +77,7 @@ func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName str securityOpt = tls.ToPDSecurityOption() } pdCli, err := pd.NewClient(componentName, pdAddrs, securityOpt, - opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels()), opt.WithInitMetricsOption(false)) + opt.WithCustomTimeoutOption(timeoutSec), opt.WithInitMetricsOption(false)) if err != nil { return err } From a21d1c4e2486c027dcdf3f6aa7cb9f0edbc8f20c Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 18 May 2026 17:16:04 +0800 Subject: [PATCH 06/18] config, store: fix keyspace metrics labels --- pkg/config/config.go | 38 ++-------------------- pkg/config/config_test.go | 57 ++++++++++++++++++++++++--------- pkg/store/driver/config_test.go | 12 +++++++ pkg/store/driver/tikv_driver.go | 33 ++++++++++++------- 4 files changed, 79 insertions(+), 61 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index d0f761f9c9bf7..bc244dd632e8c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -463,39 +463,7 @@ type KeyspaceObservabilityFieldPair struct { Value string } -var reservedKeyspaceObservabilityMetricLabels = map[string]struct{}{ - "keyspace_id": {}, - "keyspace_name": {}, - "account_lock": {}, - "action": {}, - "address": {}, - "cf": {}, - "cipher": {}, - "copr_type": {}, - "cte_type": {}, - "db": {}, - "event": {}, - "has_lock": {}, - "hash": {}, - "in_txn": {}, - "internal": {}, - "module": {}, - "name": {}, - "phase": {}, - "reason": {}, - "resource_group": {}, - "result": {}, - "scope": {}, - "sql_type": {}, - "stage": {}, - "status": {}, - "step": {}, - "store": {}, - "task": {}, - "txn_mode": {}, - "type": {}, - "version": {}, -} +const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" var reservedKeyspaceObservabilitySlowLogFields = map[string]struct{}{ "backoff_detail": {}, @@ -762,8 +730,8 @@ func (o KeyspaceObservability) Valid() error { return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) } key := strings.ToLower(field.MetricLabel) - if _, ok := reservedKeyspaceObservabilityMetricLabels[key]; ok { - return fmt.Errorf("[keyspace-observability.fields.%d] reserved metric-label %q", i, field.MetricLabel) + if !strings.HasPrefix(key, keyspaceObservabilityMetricLabelPrefix) { + return fmt.Errorf("[keyspace-observability.fields.%d] metric-label %q must start with %q", i, field.MetricLabel, keyspaceObservabilityMetricLabelPrefix) } if _, ok := metricLabels[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] duplicated metric-label %q", i, field.MetricLabel) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 7df4783ef41e3..16adb20ccbc0c 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -175,14 +175,14 @@ func TestKeyspaceObservability(t *testing.T) { content := ` [[keyspace-observability.fields]] source = "meta_a" -metric-label = "label_a" +metric-label = "keyspace_meta_label_a" slow-log-field = "Slow_meta_a" stmt-log-field = "stmt_meta_a" required = true [[keyspace-observability.fields]] source = "meta_b" -metric-label = "label_b" +metric-label = "keyspace_meta_label_b" ` _, err := toml.Decode(content, conf) require.NoError(t, err) @@ -191,13 +191,13 @@ metric-label = "label_b" "meta_a": "value_a", "meta_b": "value_b", })) - require.Equal(t, map[string]string{"label_a": "value_a", "label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, map[string]string{"keyspace_meta_label_a": "value_a", "keyspace_meta_label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) require.Equal(t, []KeyspaceObservabilityFieldPair{{Key: "Slow_meta_a", Value: "value_a"}}, conf.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, []KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, conf.GetKeyspaceObservabilityStmtLogFields()) metricLabels := conf.GetKeyspaceObservabilityMetricLabels() - metricLabels["label_a"] = "changed" - require.Equal(t, "value_a", conf.GetKeyspaceObservabilityMetricLabels()["label_a"]) + metricLabels["keyspace_meta_label_a"] = "changed" + require.Equal(t, "value_a", conf.GetKeyspaceObservabilityMetricLabels()["keyspace_meta_label_a"]) require.ErrorContains(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_b": "value_b"}), `missing required keyspace metadata entry "meta_a"`) } @@ -213,7 +213,7 @@ func TestKeyspaceObservabilityInvalid(t *testing.T) { content: ` [[keyspace-observability.fields]] source = "" -metric-label = "label_a" +metric-label = "keyspace_meta_label_a" `, err: "source cannot be empty", }, @@ -239,31 +239,58 @@ metric-label = "1_label" content: ` [[keyspace-observability.fields]] source = "meta_a" -metric-label = "label_a" +metric-label = "keyspace_meta_label_a" [[keyspace-observability.fields]] source = "meta_b" -metric-label = "LABEL_A" +metric-label = "KEYSPACE_META_LABEL_A" `, - err: `duplicated metric-label "LABEL_A"`, + err: `duplicated metric-label "KEYSPACE_META_LABEL_A"`, }, { - name: "reserved label", + name: "reserved label without prefix", content: ` [[keyspace-observability.fields]] source = "meta_a" metric-label = "KEYSPACE_ID" `, - err: `reserved metric-label "KEYSPACE_ID"`, + err: `metric-label "KEYSPACE_ID" must start with "keyspace_meta_"`, }, { - name: "reserved metric variable label", + name: "metric variable label without prefix", content: ` [[keyspace-observability.fields]] source = "meta_a" metric-label = "TYPE" `, - err: `reserved metric-label "TYPE"`, + err: `metric-label "TYPE" must start with "keyspace_meta_"`, + }, + { + name: "api label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "api" +`, + err: `metric-label "api" must start with "keyspace_meta_"`, + }, + { + name: "service scope label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "service_scope" +`, + err: `metric-label "service_scope" must start with "keyspace_meta_"`, + }, + { + name: "task id label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "task_id" +`, + err: `metric-label "task_id" must start with "keyspace_meta_"`, }, { name: "reserved slow log field", @@ -332,7 +359,7 @@ stmt-log-field = "stmt_meta" _, err := toml.Decode(` [[keyspace-observability.fields]] source = "meta_a" -metric-label = "label_a" +metric-label = "keyspace_meta_label_a" `, conf) require.NoError(t, err) require.ErrorContains(t, conf.Valid(), "keyspace-observability.fields can only be configured when deploy-mode is starter") @@ -1284,7 +1311,7 @@ dxf-resource-limit = 101`), 0644)) [[keyspace-observability.fields]] source = "meta_a" -metric-label = "label_a" +metric-label = "keyspace_meta_label_a" `), 0644)) conf = NewConfig() require.NoError(t, conf.Load(configFile)) diff --git a/pkg/store/driver/config_test.go b/pkg/store/driver/config_test.go index ab78573ee8a03..07fa0fdbc6633 100644 --- a/pkg/store/driver/config_test.go +++ b/pkg/store/driver/config_test.go @@ -17,8 +17,10 @@ package driver import ( "testing" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/config" + "github.com/tikv/pd/client/opt" ) func TestSetDefaultAndOptions(t *testing.T) { @@ -34,4 +36,14 @@ func TestSetDefaultAndOptions(t *testing.T) { require.Equal(t, globalConfig.TxnLocalLatches, d.txnLocalLatches) require.Equal(t, globalConfig.PDClient, d.pdConfig) require.Equal(t, origSecurity, config.GetGlobalConfig().Security) + + metricscommon.SetConstLabels("keyspace_id", "42", "keyspace_name", "ks") + t.Cleanup(func() { + metricscommon.SetConstLabels() + }) + pdOpt := opt.NewOption() + for _, apply := range d.pdClientOptions() { + apply(pdOpt) + } + require.Equal(t, metricscommon.GetConstLabels(), pdOpt.MetricsLabels) } diff --git a/pkg/store/driver/tikv_driver.go b/pkg/store/driver/tikv_driver.go index 9ca2772fca115..d61fa2c2d2200 100644 --- a/pkg/store/driver/tikv_driver.go +++ b/pkg/store/driver/tikv_driver.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/metrics" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/store/copr" derr "github.com/pingcap/tidb/pkg/store/driver/error" @@ -164,17 +165,7 @@ func (d *TiKVDriver) OpenWithOptions(path string, options ...Option) (resStore k CertPath: d.security.ClusterSSLCert, KeyPath: d.security.ClusterSSLKey, }, - opt.WithGRPCDialOptions( - // keep the same with etcd, see - // https://github.com/etcd-io/etcd/blob/5704c6148d798ea444db26a966394406d8c10526/server/etcdserver/api/v3rpc/grpc.go#L34 - grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), - grpc.WithKeepaliveParams(keepalive.ClientParameters{ - Time: time.Duration(d.tikvConfig.GrpcKeepAliveTime) * time.Second, - Timeout: time.Duration(d.tikvConfig.GrpcKeepAliveTimeout) * time.Second, - }), - ), - opt.WithCustomTimeoutOption(time.Duration(d.pdConfig.PDServerTimeout)*time.Second), - opt.WithForwardingOption(config.GetGlobalConfig().EnableForwarding)) + d.pdClientOptions()...) if err != nil { return nil, errors.Trace(err) } @@ -253,6 +244,26 @@ func (d *TiKVDriver) OpenWithOptions(path string, options ...Option) (resStore k return store, nil } +func (d *TiKVDriver) pdClientOptions() []opt.ClientOption { + opts := []opt.ClientOption{ + opt.WithGRPCDialOptions( + // keep the same with etcd, see + // https://github.com/etcd-io/etcd/blob/5704c6148d798ea444db26a966394406d8c10526/server/etcdserver/api/v3rpc/grpc.go#L34 + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), + grpc.WithKeepaliveParams(keepalive.ClientParameters{ + Time: time.Duration(d.tikvConfig.GrpcKeepAliveTime) * time.Second, + Timeout: time.Duration(d.tikvConfig.GrpcKeepAliveTimeout) * time.Second, + }), + ), + opt.WithCustomTimeoutOption(time.Duration(d.pdConfig.PDServerTimeout) * time.Second), + opt.WithForwardingOption(config.GetGlobalConfig().EnableForwarding), + } + if labels := metricscommon.GetConstLabels(); len(labels) > 0 { + opts = append(opts, opt.WithMetricsLabels(labels)) + } + return opts +} + type tikvStore struct { *tikv.KVStore etcdAddrs []string From c5f2dfb7153c63c5c80c1abb4579e7e38193a5ff Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 18 May 2026 17:47:52 +0800 Subject: [PATCH 07/18] metricsutil: fix keyspace metrics review comments --- cmd/tidb-server/main_test.go | 4 ++-- pkg/store/driver/BUILD.bazel | 3 +++ pkg/util/metricsutil/common.go | 12 +++++++----- pkg/util/metricsutil/common_test.go | 6 ++++++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 053a9ec419843..1d0a072d2ed6b 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -163,7 +163,7 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { conf.KeyspaceObservability = config.KeyspaceObservability{ Fields: []config.KeyspaceObservabilityField{{ Source: "meta_a", - MetricLabel: "label_a", + MetricLabel: "keyspace_meta_label_a", SlowLogField: "Slow_meta_a", StmtLogField: "stmt_meta_a", Required: true, @@ -178,7 +178,7 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { require.NoError(t, err) cfg := config.GetGlobalConfig() - require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks", "label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks", "keyspace_meta_label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "Slow_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilityStmtLogFields()) } diff --git a/pkg/store/driver/BUILD.bazel b/pkg/store/driver/BUILD.bazel index 13a574bf66695..d7dd84b06e416 100644 --- a/pkg/store/driver/BUILD.bazel +++ b/pkg/store/driver/BUILD.bazel @@ -8,6 +8,7 @@ go_library( deps = [ "//pkg/kv", "//pkg/metrics", + "//pkg/metrics/common", "//pkg/sessionctx/variable", "//pkg/store/copr", "//pkg/store/driver/error", @@ -49,6 +50,7 @@ go_test( shard_count = 8, deps = [ "//pkg/kv", + "//pkg/metrics/common", "//pkg/session", "//pkg/store/mockstore", "//pkg/testkit", @@ -64,6 +66,7 @@ go_test( "@com_github_tikv_client_go_v2//oracle", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//tikvrpc", + "@com_github_tikv_pd_client//opt", "@org_uber_go_goleak//:goleak", ], ) diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 628a8ed6a7730..76c0cb2f0f091 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -87,11 +87,7 @@ func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName str if err != nil { return err } - if !kerneltype.IsNextGen() { - labels := cloneConstLabels() - labels[defaultKeyspaceLabel] = fmt.Sprint(keyspaceMeta.GetId()) - metricscommon.SetConstLabelsFromMap(labels) - } + setKeyspaceIDConstLabel(keyspaceMeta.GetId()) return registerMetrics() } @@ -136,6 +132,12 @@ func cloneConstLabels() map[string]string { return labels } +func setKeyspaceIDConstLabel(keyspaceID uint32) { + labels := cloneConstLabels() + labels[defaultKeyspaceLabel] = fmt.Sprint(keyspaceID) + metricscommon.SetConstLabelsFromMap(labels) +} + func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { // Load Keyspace meta with retry. var keyspaceMeta *keyspacepb.KeyspaceMeta diff --git a/pkg/util/metricsutil/common_test.go b/pkg/util/metricsutil/common_test.go index 96a6d6c727521..a25ae826eeeb3 100644 --- a/pkg/util/metricsutil/common_test.go +++ b/pkg/util/metricsutil/common_test.go @@ -44,4 +44,10 @@ func TestRegisterMetricsWithKeyspaceObservabilityValues(t *testing.T) { labels = metricscommon.GetConstLabels() require.Equal(t, "base_value", labels["base_label"]) require.Equal(t, "value_a", labels["label_a"]) + + metricscommon.SetConstLabels("keyspace_name", "ks") + setKeyspaceIDConstLabel(42) + labels = metricscommon.GetConstLabels() + require.Equal(t, "ks", labels["keyspace_name"]) + require.Equal(t, "42", labels["keyspace_id"]) } From 4cedd93c5551c82239ad5286cd94cf754840da8e Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 19 May 2026 18:34:57 +0800 Subject: [PATCH 08/18] keyspace: cache keyspace meta for logging --- cmd/tidb-server/main.go | 1 + pkg/keyspace/BUILD.bazel | 6 +++++- pkg/keyspace/keyspace.go | 25 ++++++++++++++++++++++- pkg/keyspace/keyspace_test.go | 38 +++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 2987b2ef1a7dd..2ed4a719d6174 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -1190,6 +1190,7 @@ func prepareKeyspaceObservability() error { if err != nil { return err } + keyspace.SetKeyspaceMeta(keyspaceMeta) return prepareKeyspaceObservabilityWithKeyspaceMeta(keyspaceMeta, cfg.KeyspaceName, deploymode.IsStarter()) } diff --git a/pkg/keyspace/BUILD.bazel b/pkg/keyspace/BUILD.bazel index 72ad3ccff1a6c..65243270ef1c1 100644 --- a/pkg/keyspace/BUILD.bazel +++ b/pkg/keyspace/BUILD.bazel @@ -11,6 +11,7 @@ go_library( deps = [ "//pkg/config", "//pkg/config/kerneltype", + "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_pingcap_kvproto//pkg/kvrpcpb", "@com_github_tikv_client_go_v2//tikv", "@org_uber_go_zap//:zap", @@ -24,10 +25,13 @@ go_test( srcs = ["keyspace_test.go"], embed = [":keyspace"], flaky = True, - shard_count = 2, + shard_count = 3, deps = [ "//pkg/config", "//pkg/config/kerneltype", + "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_stretchr_testify//require", + "@org_uber_go_zap//:zap", + "@org_uber_go_zap//zaptest/observer", ], ) diff --git a/pkg/keyspace/keyspace.go b/pkg/keyspace/keyspace.go index 86eba95040998..1207bd30c6fab 100644 --- a/pkg/keyspace/keyspace.go +++ b/pkg/keyspace/keyspace.go @@ -18,6 +18,7 @@ import ( "fmt" "sync" + "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/kerneltype" @@ -61,6 +62,7 @@ func GetKeyspaceNameBySettings() (keyspaceName string) { var keyspaceNameBytes []byte var genKeyspaceNameOnce sync.Once +var keyspaceMeta sync.Map // GetKeyspaceNameBytesBySettings is used to get keyspace name setting as a byte slice. func GetKeyspaceNameBytesBySettings() []byte { @@ -80,12 +82,33 @@ func IsKeyspaceNameEmpty(keyspaceName string) bool { return keyspaceName == "" } +// SetKeyspaceMeta stores keyspace metadata loaded at startup for later reuse. +func SetKeyspaceMeta(meta *keyspacepb.KeyspaceMeta) { + if meta == nil || IsKeyspaceNameEmpty(meta.GetName()) { + return + } + keyspaceMeta.Store(meta.GetName(), meta) +} + +// GetKeyspaceMeta returns keyspace metadata loaded at startup. +func GetKeyspaceMeta(keyspaceName string) (*keyspacepb.KeyspaceMeta, bool) { + meta, ok := keyspaceMeta.Load(keyspaceName) + if !ok { + return nil, false + } + return meta.(*keyspacepb.KeyspaceMeta), true +} + // WrapZapcoreWithKeyspace is used to wrap zapcore.Core. func WrapZapcoreWithKeyspace() zap.Option { return zap.WrapCore(func(core zapcore.Core) zapcore.Core { keyspaceName := GetKeyspaceNameBySettings() if !IsKeyspaceNameEmpty(keyspaceName) { - core = core.With([]zap.Field{zap.String("keyspaceName", keyspaceName)}) + fields := []zap.Field{zap.String("keyspaceName", keyspaceName)} + if meta, ok := GetKeyspaceMeta(keyspaceName); ok { + fields = append(fields, zap.Uint32("keyspaceID", meta.GetId())) + } + core = core.With(fields) } return core }) diff --git a/pkg/keyspace/keyspace_test.go b/pkg/keyspace/keyspace_test.go index 50e64bd4b032d..b6a205b6cd45d 100644 --- a/pkg/keyspace/keyspace_test.go +++ b/pkg/keyspace/keyspace_test.go @@ -18,9 +18,12 @@ import ( "sync" "testing" + "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/kerneltype" "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" ) func TestSetKeyspaceNameInConf(t *testing.T) { @@ -71,6 +74,41 @@ func TestNoKeyspaceNameSet(t *testing.T) { } } +func TestSetKeyspaceMeta(t *testing.T) { + restore := config.RestoreFunc() + defer restore() + t.Cleanup(func() { + keyspaceMeta.Delete("test_keyspace") + }) + + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceName = "" + }) + + meta := &keyspacepb.KeyspaceMeta{Id: 42, Name: "test_keyspace"} + SetKeyspaceMeta(meta) + + got, ok := GetKeyspaceMeta("test_keyspace") + require.True(t, ok) + require.Equal(t, uint32(42), got.GetId()) + require.Equal(t, "test_keyspace", got.GetName()) + + SetKeyspaceMeta(nil) + _, ok = GetKeyspaceMeta("") + require.False(t, ok) + + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceName = "test_keyspace" + }) + core, logs := observer.New(zap.InfoLevel) + logger := zap.New(core, WrapZapcoreWithKeyspace()) + logger.Info("test") + entries := logs.All() + require.Len(t, entries, 1) + require.Equal(t, "test_keyspace", entries[0].ContextMap()["keyspaceName"]) + require.Equal(t, uint32(42), entries[0].ContextMap()["keyspaceID"]) +} + func BenchmarkGetKeyspaceNameBytesBySettings(b *testing.B) { if !kerneltype.IsNextGen() { b.Skip("NextGen is not enabled, skipping benchmark") From fc9ba61c39fee887a8a21ec3c75cc23f6066ee1f Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 25 May 2026 18:10:35 +0800 Subject: [PATCH 09/18] *: address keyspace observability review comments --- cmd/tidb-server/BUILD.bazel | 6 - cmd/tidb-server/main.go | 74 +---- cmd/tidb-server/main_test.go | 15 +- pkg/config/BUILD.bazel | 1 + pkg/config/config.go | 417 ------------------------- pkg/config/keyspace_observability.go | 437 +++++++++++++++++++++++++++ pkg/keyspace/BUILD.bazel | 6 +- pkg/keyspace/keyspace.go | 25 +- pkg/keyspace/keyspace_test.go | 38 --- pkg/standby/BUILD.bazel | 11 +- pkg/standby/standby.go | 26 +- pkg/standby/standby_test.go | 53 ++++ pkg/util/metricsutil/common.go | 2 +- 13 files changed, 550 insertions(+), 561 deletions(-) create mode 100644 pkg/config/keyspace_observability.go create mode 100644 pkg/standby/standby_test.go diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index d163d7a3981cf..a52041c6a4a35 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -68,16 +68,11 @@ go_library( "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", - "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_pingcap_log//:log", "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_golang//prometheus/push", - "@com_github_tikv_client_go_v2//config", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//txnkv/transaction", - "@com_github_tikv_pd_client//:client", - "@com_github_tikv_pd_client//opt", - "@com_github_tikv_pd_client//pkg/caller", "@org_uber_go_automaxprocs//maxprocs", "@org_uber_go_zap//:zap", ], @@ -122,7 +117,6 @@ go_test( "//pkg/sessionctx/vardef", "//pkg/sessionctx/variable", "//pkg/testkit/testsetup", - "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_stretchr_testify//require", "@io_opencensus_go//stats/view", "@org_uber_go_goleak//:goleak", diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 2ed4a719d6174..0a2cfddd9d54b 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -19,7 +19,6 @@ import ( "flag" "fmt" "io/fs" - "maps" "os" "runtime" "strconv" @@ -31,7 +30,6 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" "github.com/pingcap/failpoint" - "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/log" "github.com/pingcap/tidb/pkg/bindinfo" "github.com/pingcap/tidb/pkg/config" @@ -93,12 +91,8 @@ import ( repository "github.com/pingcap/tidb/pkg/util/workloadrepo" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/push" - tikvconfig "github.com/tikv/client-go/v2/config" "github.com/tikv/client-go/v2/tikv" "github.com/tikv/client-go/v2/txnkv/transaction" - pd "github.com/tikv/pd/client" - "github.com/tikv/pd/client/opt" - "github.com/tikv/pd/client/pkg/caller" "go.uber.org/automaxprocs/maxprocs" "go.uber.org/zap" ) @@ -321,6 +315,7 @@ func main() { } var standbyController server.StandbyController + var activationMetadata map[string]string if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() } @@ -337,12 +332,15 @@ func main() { defer standbyController.EndStandby(err) // need to validate config again in case of config change via standby terror.MustNil(config.GetGlobalConfig().Valid()) + if c, ok := standbyController.(*standby.LoadKeyspaceController); ok { + activationMetadata = c.ActivationMetadata() + } } signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) - err = prepareKeyspaceObservability() + err = prepareKeyspaceObservability(activationMetadata) terror.MustNil(err) err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1155,78 +1153,38 @@ func closeStmtSummary() { } } -var keyspaceMetaComponentName = caller.Component("tidb-keyspace-meta") - const ( keyspaceIDMetricLabel = "keyspace_id" keyspaceNameMetricLabel = "keyspace_name" ) -func prepareKeyspaceObservability() error { +func prepareKeyspaceObservability(metadata map[string]string) error { cfg := config.GetGlobalConfig() - if !kerneltype.IsNextGen() { - return nil - } - if keyspace.IsKeyspaceNameEmpty(cfg.KeyspaceName) || cfg.Store != config.StoreTypeTiKV { + if !kerneltype.IsNextGen() || cfg.Store != config.StoreTypeTiKV { return nil } metricscommon.SetConstLabels(keyspaceNameMetricLabel, cfg.KeyspaceName) - pdAddrs, _, _, err := tikvconfig.ParsePath("tikv://" + cfg.Path) - if err != nil { - return err - } - timeoutSec := time.Duration(cfg.PDClient.PDServerTimeout) * time.Second - pdCli, err := pd.NewClient(keyspaceMetaComponentName, pdAddrs, pd.SecurityOption{ - CAPath: cfg.Security.ClusterSSLCA, - CertPath: cfg.Security.ClusterSSLCert, - KeyPath: cfg.Security.ClusterSSLKey, - }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithInitMetricsOption(false)) - if err != nil { - return err - } - defer pdCli.Close() - - keyspaceMeta, err := getKeyspaceMeta(pdCli, cfg.KeyspaceName) - if err != nil { - return err - } - keyspace.SetKeyspaceMeta(keyspaceMeta) - return prepareKeyspaceObservabilityWithKeyspaceMeta(keyspaceMeta, cfg.KeyspaceName, deploymode.IsStarter()) -} - -func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { - var keyspaceMeta *keyspacepb.KeyspaceMeta - err := util.RunWithRetry(util.DefaultMaxRetries, util.RetryInterval, func() (bool, error) { - var errInner error - keyspaceMeta, errInner = pdCli.LoadKeyspace(context.TODO(), keyspaceName) - if kvstore.IsNotBootstrappedError(errInner) || kvstore.IsKeyspaceNotExistError(errInner) { - return true, errInner - } - return false, errInner - }) - if err != nil { - return nil, err - } - return keyspaceMeta, nil + return prepareKeyspaceObservabilityWithMetadata(metadata, cfg.KeyspaceName, deploymode.IsStarter()) } -func prepareKeyspaceObservabilityWithKeyspaceMeta(keyspaceMeta *keyspacepb.KeyspaceMeta, keyspaceName string, includeConfiguredFields bool) error { - if keyspaceMeta == nil { - return nil - } +func prepareKeyspaceObservabilityWithMetadata(metadata map[string]string, keyspaceName string, includeConfiguredFields bool) error { resolvedValues := config.KeyspaceObservabilityValues{ MetricLabels: map[string]string{ - keyspaceIDMetricLabel: fmt.Sprint(keyspaceMeta.GetId()), keyspaceNameMetricLabel: keyspaceName, }, } + if keyspaceID, ok := metadata[keyspaceIDMetricLabel]; ok { + resolvedValues.MetricLabels[keyspaceIDMetricLabel] = keyspaceID + } if includeConfiguredFields { copiedConfig := *config.GetGlobalConfig() - if err := copiedConfig.ResolveKeyspaceObservability(keyspaceMeta.GetConfig()); err != nil { + if err := copiedConfig.ResolveKeyspaceObservability(metadata); err != nil { return err } configuredValues := copiedConfig.KeyspaceObservabilityValues.Clone() - maps.Copy(resolvedValues.MetricLabels, configuredValues.MetricLabels) + for k, v := range configuredValues.MetricLabels { + resolvedValues.MetricLabels[k] = v + } resolvedValues.SlowLogFields = configuredValues.SlowLogFields resolvedValues.StmtLogFields = configuredValues.StmtLogFields } diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 1d0a072d2ed6b..90bdb6ce0e144 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -18,7 +18,6 @@ import ( "os" "testing" - "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/deploymode" "github.com/pingcap/tidb/pkg/config/kerneltype" @@ -171,9 +170,9 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { } }) - err := prepareKeyspaceObservabilityWithKeyspaceMeta(&keyspacepb.KeyspaceMeta{ - Id: 42, - Config: map[string]string{"meta_a": "value_a"}, + err := prepareKeyspaceObservabilityWithMetadata(map[string]string{ + keyspaceIDMetricLabel: "42", + "meta_a": "value_a", }, "ks", true) require.NoError(t, err) @@ -187,9 +186,9 @@ func TestSetupKeyspaceObservabilityForNonStarter(t *testing.T) { restore := config.RestoreFunc() defer restore() - err := prepareKeyspaceObservabilityWithKeyspaceMeta(&keyspacepb.KeyspaceMeta{ - Id: 42, - Config: map[string]string{"meta_a": "value_a"}, + err := prepareKeyspaceObservabilityWithMetadata(map[string]string{ + keyspaceIDMetricLabel: "42", + "meta_a": "value_a", }, "ks", false) require.NoError(t, err) @@ -212,6 +211,6 @@ func TestSetupKeyspaceObservabilityForStartSkipsClassic(t *testing.T) { conf.KeyspaceName = "test_keyspace" }) - require.NoError(t, prepareKeyspaceObservability()) + require.NoError(t, prepareKeyspaceObservability(nil)) require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) } diff --git a/pkg/config/BUILD.bazel b/pkg/config/BUILD.bazel index 5ab743a7d063b..0f185041d8813 100644 --- a/pkg/config/BUILD.bazel +++ b/pkg/config/BUILD.bazel @@ -6,6 +6,7 @@ go_library( "config.go", "config_util.go", "const.go", + "keyspace_observability.go", "store.go", "tiflash.go", ], diff --git a/pkg/config/config.go b/pkg/config/config.go index bc244dd632e8c..f999841efc166 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -436,423 +436,6 @@ func encodeDefTempStorageDir(tempDir string, host, statusHost string, port, stat return filepath.Join(tempDir, osUID+"_tidb", dirName, "tmp-storage") } -// KeyspaceObservability maps metadata entries to observability outputs. -type KeyspaceObservability struct { - Fields []KeyspaceObservabilityField `toml:"fields" json:"fields"` -} - -// KeyspaceObservabilityField describes one metadata entry mapping. -type KeyspaceObservabilityField struct { - Source string `toml:"source" json:"source"` - MetricLabel string `toml:"metric-label" json:"metric-label,omitempty"` - SlowLogField string `toml:"slow-log-field" json:"slow-log-field,omitempty"` - StmtLogField string `toml:"stmt-log-field" json:"stmt-log-field,omitempty"` - Required bool `toml:"required" json:"required"` -} - -// KeyspaceObservabilityValues stores resolved metadata values. -type KeyspaceObservabilityValues struct { - MetricLabels map[string]string `toml:"-" json:"-"` - SlowLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` - StmtLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` -} - -// KeyspaceObservabilityFieldPair stores one resolved output field. -type KeyspaceObservabilityFieldPair struct { - Key string - Value string -} - -const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" - -var reservedKeyspaceObservabilitySlowLogFields = map[string]struct{}{ - "backoff_detail": {}, - "backoff_time": {}, - "backoff_total": {}, - "backoff_types": {}, - "binary_plan": {}, - "commit_backoff_time": {}, - "commit_primary_rpc_detail": {}, - "commit_time": {}, - "compile_time": {}, - "conn_id": {}, - "cop_backoff_": {}, - "cop_mvcc_read_amplification": {}, - "cop_proc_addr": {}, - "cop_proc_avg": {}, - "cop_proc_max": {}, - "cop_proc_p90": {}, - "cop_time": {}, - "cop_wait_addr": {}, - "cop_wait_avg": {}, - "cop_wait_max": {}, - "cop_wait_p90": {}, - "db": {}, - "digest": {}, - "disk_max": {}, - "exec_retry_count": {}, - "exec_retry_time": {}, - "get_commit_ts_time": {}, - "get_latest_ts_time": {}, - "get_snapshot_time": {}, - "has_more_results": {}, - "host": {}, - "index_names": {}, - "is_internal": {}, - "isexplicittxn": {}, - "issyncstatsfailed": {}, - "iswritecachetable": {}, - "keyspace_id": {}, - "keyspace_name": {}, - "kv_total": {}, - "local_latch_wait_time": {}, - "lockkeys_time": {}, - "mem_arbitration": {}, - "mem_max": {}, - "num_cop_tasks": {}, - "opt_binding_match": {}, - "opt_logical": {}, - "opt_physical": {}, - "opt_stats_derive": {}, - "opt_stats_sync_wait": {}, - "optimize_time": {}, - "parse_time": {}, - "pd_total": {}, - "plan": {}, - "plan_digest": {}, - "plan_from_binding": {}, - "plan_from_cache": {}, - "preproc_subqueries": {}, - "preproc_subqueries_time": {}, - "prepared": {}, - "prewrite_backoff_types": {}, - "prewrite_region": {}, - "prewrite_time": {}, - "prev_stmt": {}, - "process_keys": {}, - "process_time": {}, - "query": {}, - "query_time": {}, - "request_count": {}, - "request_unit_read": {}, - "request_unit_v2": {}, - "request_unit_v2_detail": {}, - "request_unit_write": {}, - "resolve_lock_time": {}, - "resource_group": {}, - "result_rows": {}, - "rewrite_time": {}, - "rocksdb_block_cache_hit_count": {}, - "rocksdb_block_read_byte": {}, - "rocksdb_block_read_count": {}, - "rocksdb_block_read_time": {}, - "rocksdb_delete_skipped_count": {}, - "rocksdb_key_skipped_count": {}, - "session_alias": {}, - "session_connect_attrs": {}, - "slowest_prewrite_rpc_detail": {}, - "stats": {}, - "storage_from_kv": {}, - "storage_from_mpp": {}, - "succ": {}, - "tidb_cpu_time": {}, - "tikv_cpu_time": {}, - "time": {}, - "time_queued_by_rc": {}, - "total_keys": {}, - "txn_retry": {}, - "txn_start_ts": {}, - "unpacked_bytes_received_tiflash_cross_zone": {}, - "unpacked_bytes_received_tiflash_total": {}, - "unpacked_bytes_received_tikv_cross_zone": {}, - "unpacked_bytes_received_tikv_total": {}, - "unpacked_bytes_sent_tiflash_cross_zone": {}, - "unpacked_bytes_sent_tiflash_total": {}, - "unpacked_bytes_sent_tikv_cross_zone": {}, - "unpacked_bytes_sent_tikv_total": {}, - "user": {}, - "user@host": {}, - "wait_prewrite_binlog_time": {}, - "wait_time": {}, - "wait_ts": {}, - "warnings": {}, - "write_keys": {}, - "write_size": {}, - "write_sql_response_total": {}, -} - -var reservedKeyspaceObservabilitySlowLogFieldPrefixes = []string{ - "cop_backoff_", -} - -var reservedKeyspaceObservabilityStmtLogFields = map[string]struct{}{ - "auth_users": {}, - "backoff_types": {}, - "begin": {}, - "binding_digest": {}, - "binding_sql": {}, - "charset": {}, - "collation": {}, - "commit_count": {}, - "digest": {}, - "end": {}, - "exec_count": {}, - "exec_retry_count": {}, - "exec_retry_time": {}, - "first_seen": {}, - "index_names": {}, - "is_internal": {}, - "keyspace_id": {}, - "keyspace_name": {}, - "last_seen": {}, - "max_backoff_time": {}, - "max_commit_backoff_time": {}, - "max_commit_time": {}, - "max_compile_latency": {}, - "max_cop_process_address": {}, - "max_cop_process_time": {}, - "max_cop_wait_address": {}, - "max_cop_wait_time": {}, - "max_disk": {}, - "max_get_commit_ts_time": {}, - "max_latency": {}, - "max_local_latch_time": {}, - "max_mem": {}, - "max_mem_arbitration": {}, - "max_parse_latency": {}, - "max_prewrite_region_num": {}, - "max_prewrite_time": {}, - "max_process_time": {}, - "max_processed_keys": {}, - "max_resolve_lock_time": {}, - "max_result_rows": {}, - "max_rocksdb_block_cache_hit_count": {}, - "max_rocksdb_block_read_byte": {}, - "max_rocksdb_block_read_count": {}, - "max_rocksdb_delete_skipped_count": {}, - "max_rocksdb_key_skipped_count": {}, - "max_rru": {}, - "max_ru_wait_duration": {}, - "max_ruv2": {}, - "max_total_keys": {}, - "max_txn_retry": {}, - "max_wait_time": {}, - "max_write_keys": {}, - "max_write_size": {}, - "max_wru": {}, - "min_latency": {}, - "min_result_rows": {}, - "normalized_sql": {}, - "plan_cache_hits": {}, - "plan_cache_unqualified_count": {}, - "plan_cache_unqualified_last_reason": {}, - "plan_digest": {}, - "plan_hint": {}, - "plan_in_binding": {}, - "plan_in_cache": {}, - "prepared": {}, - "prev_sql": {}, - "resource_group_name": {}, - "sample_binary_plan": {}, - "sample_plan": {}, - "sample_sql": {}, - "schema_name": {}, - "stmt_type": {}, - "storage_kv": {}, - "storage_mpp": {}, - "sum_affected_rows": {}, - "sum_backoff_time": {}, - "sum_backoff_times": {}, - "sum_backoff_total": {}, - "sum_commit_backoff_time": {}, - "sum_commit_time": {}, - "sum_compile_latency": {}, - "sum_disk": {}, - "sum_errors": {}, - "sum_get_commit_ts_time": {}, - "sum_kv_total": {}, - "sum_latency": {}, - "sum_local_latch_time": {}, - "sum_mem": {}, - "sum_mem_arbitration": {}, - "sum_num_cop_tasks": {}, - "sum_parse_latency": {}, - "sum_pd_total": {}, - "sum_prewrite_region_num": {}, - "sum_prewrite_time": {}, - "sum_process_time": {}, - "sum_processed_keys": {}, - "sum_resolve_lock_time": {}, - "sum_result_rows": {}, - "sum_rocksdb_block_cache_hit_count": {}, - "sum_rocksdb_block_read_byte": {}, - "sum_rocksdb_block_read_count": {}, - "sum_rocksdb_delete_skipped_count": {}, - "sum_rocksdb_key_skipped_count": {}, - "sum_rru": {}, - "sum_ru_wait_duration": {}, - "sum_ruv2": {}, - "sum_tidb_cpu": {}, - "sum_tikv_cpu": {}, - "sum_total_keys": {}, - "sum_txn_retry": {}, - "sum_wait_time": {}, - "sum_warnings": {}, - "sum_write_keys": {}, - "sum_write_size": {}, - "sum_write_sql_resp_total": {}, - "sum_wru": {}, - "table_names": {}, - "unpacked_bytes_received_tiflash_cross_zone": {}, - "unpacked_bytes_received_tiflash_total": {}, - "unpacked_bytes_received_tikv_cross_zone": {}, - "unpacked_bytes_received_tikv_total": {}, - "unpacked_bytes_send_tiflash_cross_zone": {}, - "unpacked_bytes_send_tiflash_total": {}, - "unpacked_bytes_send_tikv_cross_zone": {}, - "unpacked_bytes_send_tikv_total": {}, -} - -// Valid validates metadata observability mappings. -func (o KeyspaceObservability) Valid() error { - metricLabels := make(map[string]struct{}, len(o.Fields)) - slowLogFields := make(map[string]struct{}, len(o.Fields)) - stmtLogFields := make(map[string]struct{}, len(o.Fields)) - for i, field := range o.Fields { - if field.Source == "" { - return fmt.Errorf("[keyspace-observability.fields.%d] source cannot be empty", i) - } - if field.MetricLabel == "" && field.SlowLogField == "" && field.StmtLogField == "" { - return fmt.Errorf("[keyspace-observability.fields.%d] at least one output must be set", i) - } - if field.MetricLabel != "" { - if !validPrometheusLabelName(field.MetricLabel) { - return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) - } - key := strings.ToLower(field.MetricLabel) - if !strings.HasPrefix(key, keyspaceObservabilityMetricLabelPrefix) { - return fmt.Errorf("[keyspace-observability.fields.%d] metric-label %q must start with %q", i, field.MetricLabel, keyspaceObservabilityMetricLabelPrefix) - } - if _, ok := metricLabels[key]; ok { - return fmt.Errorf("[keyspace-observability.fields.%d] duplicated metric-label %q", i, field.MetricLabel) - } - metricLabels[key] = struct{}{} - } - if field.SlowLogField != "" { - if !validKeyspaceObservabilityLogFieldName(field.SlowLogField) { - return fmt.Errorf("[keyspace-observability.fields.%d] invalid slow-log-field %q", i, field.SlowLogField) - } - key := strings.ToLower(field.SlowLogField) - if isReservedKeyspaceObservabilitySlowLogField(key) { - return fmt.Errorf("[keyspace-observability.fields.%d] reserved slow-log-field %q", i, field.SlowLogField) - } - if _, ok := slowLogFields[key]; ok { - return fmt.Errorf("[keyspace-observability.fields.%d] duplicated slow-log-field %q", i, field.SlowLogField) - } - slowLogFields[key] = struct{}{} - } - if field.StmtLogField != "" { - key := strings.ToLower(field.StmtLogField) - if _, ok := reservedKeyspaceObservabilityStmtLogFields[key]; ok { - return fmt.Errorf("[keyspace-observability.fields.%d] reserved stmt-log-field %q", i, field.StmtLogField) - } - if _, ok := stmtLogFields[key]; ok { - return fmt.Errorf("[keyspace-observability.fields.%d] duplicated stmt-log-field %q", i, field.StmtLogField) - } - stmtLogFields[key] = struct{}{} - } - } - return nil -} - -func isReservedKeyspaceObservabilitySlowLogField(field string) bool { - if _, ok := reservedKeyspaceObservabilitySlowLogFields[field]; ok { - return true - } - for _, prefix := range reservedKeyspaceObservabilitySlowLogFieldPrefixes { - if strings.HasPrefix(field, prefix) { - return true - } - } - return false -} - -func validKeyspaceObservabilityLogFieldName(field string) bool { - return validPrometheusLabelName(field) -} - -func validPrometheusLabelName(label string) bool { - for i, r := range label { - if i == 0 { - if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' { - continue - } - return false - } - if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' || r >= '0' && r <= '9' { - continue - } - return false - } - return label != "" -} - -// ResolveKeyspaceObservability resolves configured output values from metadata. -func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { - resolved := KeyspaceObservabilityValues{ - MetricLabels: make(map[string]string), - } - for _, field := range c.KeyspaceObservability.Fields { - value, ok := values[field.Source] - if !ok { - if field.Required { - return fmt.Errorf("missing required keyspace metadata entry %q", field.Source) - } - continue - } - if field.MetricLabel != "" { - resolved.MetricLabels[field.MetricLabel] = value - } - if field.SlowLogField != "" { - resolved.SlowLogFields = append(resolved.SlowLogFields, KeyspaceObservabilityFieldPair{Key: field.SlowLogField, Value: value}) - } - if field.StmtLogField != "" { - resolved.StmtLogFields = append(resolved.StmtLogFields, KeyspaceObservabilityFieldPair{Key: field.StmtLogField, Value: value}) - } - } - c.KeyspaceObservabilityValues = resolved.Clone() - return nil -} - -// Clone returns a deep copy of resolved metadata observability values. -func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { - res := KeyspaceObservabilityValues{} - if len(v.MetricLabels) > 0 { - res.MetricLabels = make(map[string]string, len(v.MetricLabels)) - for k, value := range v.MetricLabels { - res.MetricLabels[k] = value - } - } - res.SlowLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.SlowLogFields...) - res.StmtLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.StmtLogFields...) - return res -} - -// GetKeyspaceObservabilityMetricLabels returns resolved metric labels. -func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { - return c.KeyspaceObservabilityValues.Clone().MetricLabels -} - -// GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields. -func (c *Config) GetKeyspaceObservabilitySlowLogFields() []KeyspaceObservabilityFieldPair { - return c.KeyspaceObservabilityValues.Clone().SlowLogFields -} - -// GetKeyspaceObservabilityStmtLogFields returns resolved statement log fields. -func (c *Config) GetKeyspaceObservabilityStmtLogFields() []KeyspaceObservabilityFieldPair { - return c.KeyspaceObservabilityValues.Clone().StmtLogFields -} - // nullableBool defaults unset bool options to unset instead of false, which enables us to know if the user has set 2 // conflict options at the same time. type nullableBool struct { diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go new file mode 100644 index 0000000000000..ba6da947de5c0 --- /dev/null +++ b/pkg/config/keyspace_observability.go @@ -0,0 +1,437 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + "strings" +) + +// KeyspaceObservability maps metadata entries to observability outputs. +type KeyspaceObservability struct { + Fields []KeyspaceObservabilityField `toml:"fields" json:"fields"` +} + +// KeyspaceObservabilityField describes one metadata entry mapping. +type KeyspaceObservabilityField struct { + Source string `toml:"source" json:"source"` + MetricLabel string `toml:"metric-label" json:"metric-label,omitempty"` + SlowLogField string `toml:"slow-log-field" json:"slow-log-field,omitempty"` + StmtLogField string `toml:"stmt-log-field" json:"stmt-log-field,omitempty"` + Required bool `toml:"required" json:"required"` +} + +// KeyspaceObservabilityValues stores resolved metadata values. +type KeyspaceObservabilityValues struct { + MetricLabels map[string]string `toml:"-" json:"-"` + SlowLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` + StmtLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` +} + +// KeyspaceObservabilityFieldPair stores one resolved output field. +type KeyspaceObservabilityFieldPair struct { + Key string + Value string +} + +const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" + +var reservedKeyspaceObservabilitySlowLogFields = map[string]struct{}{ + "backoff_detail": {}, + "backoff_time": {}, + "backoff_total": {}, + "backoff_types": {}, + "binary_plan": {}, + "commit_backoff_time": {}, + "commit_primary_rpc_detail": {}, + "commit_time": {}, + "compile_time": {}, + "conn_id": {}, + "cop_backoff_": {}, + "cop_mvcc_read_amplification": {}, + "cop_proc_addr": {}, + "cop_proc_avg": {}, + "cop_proc_max": {}, + "cop_proc_p90": {}, + "cop_time": {}, + "cop_wait_addr": {}, + "cop_wait_avg": {}, + "cop_wait_max": {}, + "cop_wait_p90": {}, + "db": {}, + "digest": {}, + "disk_max": {}, + "exec_retry_count": {}, + "exec_retry_time": {}, + "get_commit_ts_time": {}, + "get_latest_ts_time": {}, + "get_snapshot_time": {}, + "has_more_results": {}, + "host": {}, + "index_names": {}, + "is_internal": {}, + "isexplicittxn": {}, + "issyncstatsfailed": {}, + "iswritecachetable": {}, + "keyspace_id": {}, + "keyspace_name": {}, + "kv_total": {}, + "local_latch_wait_time": {}, + "lockkeys_time": {}, + "mem_arbitration": {}, + "mem_max": {}, + "num_cop_tasks": {}, + "opt_binding_match": {}, + "opt_logical": {}, + "opt_physical": {}, + "opt_stats_derive": {}, + "opt_stats_sync_wait": {}, + "optimize_time": {}, + "parse_time": {}, + "pd_total": {}, + "plan": {}, + "plan_digest": {}, + "plan_from_binding": {}, + "plan_from_cache": {}, + "preproc_subqueries": {}, + "preproc_subqueries_time": {}, + "prepared": {}, + "prewrite_backoff_types": {}, + "prewrite_region": {}, + "prewrite_time": {}, + "prev_stmt": {}, + "process_keys": {}, + "process_time": {}, + "query": {}, + "query_time": {}, + "request_count": {}, + "request_unit_read": {}, + "request_unit_v2": {}, + "request_unit_v2_detail": {}, + "request_unit_write": {}, + "resolve_lock_time": {}, + "resource_group": {}, + "result_rows": {}, + "rewrite_time": {}, + "rocksdb_block_cache_hit_count": {}, + "rocksdb_block_read_byte": {}, + "rocksdb_block_read_count": {}, + "rocksdb_block_read_time": {}, + "rocksdb_delete_skipped_count": {}, + "rocksdb_key_skipped_count": {}, + "session_alias": {}, + "session_connect_attrs": {}, + "slowest_prewrite_rpc_detail": {}, + "stats": {}, + "storage_from_kv": {}, + "storage_from_mpp": {}, + "succ": {}, + "tidb_cpu_time": {}, + "tikv_cpu_time": {}, + "time": {}, + "time_queued_by_rc": {}, + "total_keys": {}, + "txn_retry": {}, + "txn_start_ts": {}, + "unpacked_bytes_received_tiflash_cross_zone": {}, + "unpacked_bytes_received_tiflash_total": {}, + "unpacked_bytes_received_tikv_cross_zone": {}, + "unpacked_bytes_received_tikv_total": {}, + "unpacked_bytes_sent_tiflash_cross_zone": {}, + "unpacked_bytes_sent_tiflash_total": {}, + "unpacked_bytes_sent_tikv_cross_zone": {}, + "unpacked_bytes_sent_tikv_total": {}, + "user": {}, + "user@host": {}, + "wait_prewrite_binlog_time": {}, + "wait_time": {}, + "wait_ts": {}, + "warnings": {}, + "write_keys": {}, + "write_size": {}, + "write_sql_response_total": {}, +} + +var reservedKeyspaceObservabilitySlowLogFieldPrefixes = []string{ + "cop_backoff_", +} + +var reservedKeyspaceObservabilityStmtLogFields = map[string]struct{}{ + "auth_users": {}, + "backoff_types": {}, + "begin": {}, + "binding_digest": {}, + "binding_sql": {}, + "charset": {}, + "collation": {}, + "commit_count": {}, + "digest": {}, + "end": {}, + "exec_count": {}, + "exec_retry_count": {}, + "exec_retry_time": {}, + "first_seen": {}, + "index_names": {}, + "is_internal": {}, + "keyspace_id": {}, + "keyspace_name": {}, + "last_seen": {}, + "max_backoff_time": {}, + "max_commit_backoff_time": {}, + "max_commit_time": {}, + "max_compile_latency": {}, + "max_cop_process_address": {}, + "max_cop_process_time": {}, + "max_cop_wait_address": {}, + "max_cop_wait_time": {}, + "max_disk": {}, + "max_get_commit_ts_time": {}, + "max_latency": {}, + "max_local_latch_time": {}, + "max_mem": {}, + "max_mem_arbitration": {}, + "max_parse_latency": {}, + "max_prewrite_region_num": {}, + "max_prewrite_time": {}, + "max_process_time": {}, + "max_processed_keys": {}, + "max_resolve_lock_time": {}, + "max_result_rows": {}, + "max_rocksdb_block_cache_hit_count": {}, + "max_rocksdb_block_read_byte": {}, + "max_rocksdb_block_read_count": {}, + "max_rocksdb_delete_skipped_count": {}, + "max_rocksdb_key_skipped_count": {}, + "max_rru": {}, + "max_ru_wait_duration": {}, + "max_ruv2": {}, + "max_total_keys": {}, + "max_txn_retry": {}, + "max_wait_time": {}, + "max_write_keys": {}, + "max_write_size": {}, + "max_wru": {}, + "min_latency": {}, + "min_result_rows": {}, + "normalized_sql": {}, + "plan_cache_hits": {}, + "plan_cache_unqualified_count": {}, + "plan_cache_unqualified_last_reason": {}, + "plan_digest": {}, + "plan_hint": {}, + "plan_in_binding": {}, + "plan_in_cache": {}, + "prepared": {}, + "prev_sql": {}, + "resource_group_name": {}, + "sample_binary_plan": {}, + "sample_plan": {}, + "sample_sql": {}, + "schema_name": {}, + "stmt_type": {}, + "storage_kv": {}, + "storage_mpp": {}, + "sum_affected_rows": {}, + "sum_backoff_time": {}, + "sum_backoff_times": {}, + "sum_backoff_total": {}, + "sum_commit_backoff_time": {}, + "sum_commit_time": {}, + "sum_compile_latency": {}, + "sum_disk": {}, + "sum_errors": {}, + "sum_get_commit_ts_time": {}, + "sum_kv_total": {}, + "sum_latency": {}, + "sum_local_latch_time": {}, + "sum_mem": {}, + "sum_mem_arbitration": {}, + "sum_num_cop_tasks": {}, + "sum_parse_latency": {}, + "sum_pd_total": {}, + "sum_prewrite_region_num": {}, + "sum_prewrite_time": {}, + "sum_process_time": {}, + "sum_processed_keys": {}, + "sum_resolve_lock_time": {}, + "sum_result_rows": {}, + "sum_rocksdb_block_cache_hit_count": {}, + "sum_rocksdb_block_read_byte": {}, + "sum_rocksdb_block_read_count": {}, + "sum_rocksdb_delete_skipped_count": {}, + "sum_rocksdb_key_skipped_count": {}, + "sum_rru": {}, + "sum_ru_wait_duration": {}, + "sum_ruv2": {}, + "sum_tidb_cpu": {}, + "sum_tikv_cpu": {}, + "sum_total_keys": {}, + "sum_txn_retry": {}, + "sum_wait_time": {}, + "sum_warnings": {}, + "sum_write_keys": {}, + "sum_write_size": {}, + "sum_write_sql_resp_total": {}, + "sum_wru": {}, + "table_names": {}, + "unpacked_bytes_received_tiflash_cross_zone": {}, + "unpacked_bytes_received_tiflash_total": {}, + "unpacked_bytes_received_tikv_cross_zone": {}, + "unpacked_bytes_received_tikv_total": {}, + "unpacked_bytes_send_tiflash_cross_zone": {}, + "unpacked_bytes_send_tiflash_total": {}, + "unpacked_bytes_send_tikv_cross_zone": {}, + "unpacked_bytes_send_tikv_total": {}, +} + +// Valid validates metadata observability mappings. +func (o KeyspaceObservability) Valid() error { + metricLabels := make(map[string]struct{}, len(o.Fields)) + slowLogFields := make(map[string]struct{}, len(o.Fields)) + stmtLogFields := make(map[string]struct{}, len(o.Fields)) + for i, field := range o.Fields { + if field.Source == "" { + return fmt.Errorf("[keyspace-observability.fields.%d] source cannot be empty", i) + } + if field.MetricLabel == "" && field.SlowLogField == "" && field.StmtLogField == "" { + return fmt.Errorf("[keyspace-observability.fields.%d] at least one output must be set", i) + } + if field.MetricLabel != "" { + if !validPrometheusLabelName(field.MetricLabel) { + return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) + } + key := strings.ToLower(field.MetricLabel) + if !strings.HasPrefix(key, keyspaceObservabilityMetricLabelPrefix) { + return fmt.Errorf("[keyspace-observability.fields.%d] metric-label %q must start with %q", i, field.MetricLabel, keyspaceObservabilityMetricLabelPrefix) + } + if _, ok := metricLabels[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated metric-label %q", i, field.MetricLabel) + } + metricLabels[key] = struct{}{} + } + if field.SlowLogField != "" { + if !validKeyspaceObservabilityLogFieldName(field.SlowLogField) { + return fmt.Errorf("[keyspace-observability.fields.%d] invalid slow-log-field %q", i, field.SlowLogField) + } + key := strings.ToLower(field.SlowLogField) + if isReservedKeyspaceObservabilitySlowLogField(key) { + return fmt.Errorf("[keyspace-observability.fields.%d] reserved slow-log-field %q", i, field.SlowLogField) + } + if _, ok := slowLogFields[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated slow-log-field %q", i, field.SlowLogField) + } + slowLogFields[key] = struct{}{} + } + if field.StmtLogField != "" { + key := strings.ToLower(field.StmtLogField) + if _, ok := reservedKeyspaceObservabilityStmtLogFields[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] reserved stmt-log-field %q", i, field.StmtLogField) + } + if _, ok := stmtLogFields[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated stmt-log-field %q", i, field.StmtLogField) + } + stmtLogFields[key] = struct{}{} + } + } + return nil +} + +func isReservedKeyspaceObservabilitySlowLogField(field string) bool { + if _, ok := reservedKeyspaceObservabilitySlowLogFields[field]; ok { + return true + } + for _, prefix := range reservedKeyspaceObservabilitySlowLogFieldPrefixes { + if strings.HasPrefix(field, prefix) { + return true + } + } + return false +} + +func validKeyspaceObservabilityLogFieldName(field string) bool { + return validPrometheusLabelName(field) +} + +func validPrometheusLabelName(label string) bool { + for i, r := range label { + if i == 0 { + if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' { + continue + } + return false + } + if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' || r >= '0' && r <= '9' { + continue + } + return false + } + return label != "" +} + +// ResolveKeyspaceObservability resolves configured output values from metadata. +func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { + resolved := KeyspaceObservabilityValues{ + MetricLabels: make(map[string]string), + } + for _, field := range c.KeyspaceObservability.Fields { + value, ok := values[field.Source] + if !ok { + if field.Required { + return fmt.Errorf("missing required keyspace metadata entry %q", field.Source) + } + continue + } + if field.MetricLabel != "" { + resolved.MetricLabels[field.MetricLabel] = value + } + if field.SlowLogField != "" { + resolved.SlowLogFields = append(resolved.SlowLogFields, KeyspaceObservabilityFieldPair{Key: field.SlowLogField, Value: value}) + } + if field.StmtLogField != "" { + resolved.StmtLogFields = append(resolved.StmtLogFields, KeyspaceObservabilityFieldPair{Key: field.StmtLogField, Value: value}) + } + } + c.KeyspaceObservabilityValues = resolved.Clone() + return nil +} + +// Clone returns a deep copy of resolved metadata observability values. +func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { + res := KeyspaceObservabilityValues{} + if len(v.MetricLabels) > 0 { + res.MetricLabels = make(map[string]string, len(v.MetricLabels)) + for k, value := range v.MetricLabels { + res.MetricLabels[k] = value + } + } + res.SlowLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.SlowLogFields...) + res.StmtLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.StmtLogFields...) + return res +} + +// GetKeyspaceObservabilityMetricLabels returns resolved metric labels. +func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { + return c.KeyspaceObservabilityValues.Clone().MetricLabels +} + +// GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields. +func (c *Config) GetKeyspaceObservabilitySlowLogFields() []KeyspaceObservabilityFieldPair { + return c.KeyspaceObservabilityValues.Clone().SlowLogFields +} + +// GetKeyspaceObservabilityStmtLogFields returns resolved statement log fields. +func (c *Config) GetKeyspaceObservabilityStmtLogFields() []KeyspaceObservabilityFieldPair { + return c.KeyspaceObservabilityValues.Clone().StmtLogFields +} diff --git a/pkg/keyspace/BUILD.bazel b/pkg/keyspace/BUILD.bazel index 65243270ef1c1..72ad3ccff1a6c 100644 --- a/pkg/keyspace/BUILD.bazel +++ b/pkg/keyspace/BUILD.bazel @@ -11,7 +11,6 @@ go_library( deps = [ "//pkg/config", "//pkg/config/kerneltype", - "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_pingcap_kvproto//pkg/kvrpcpb", "@com_github_tikv_client_go_v2//tikv", "@org_uber_go_zap//:zap", @@ -25,13 +24,10 @@ go_test( srcs = ["keyspace_test.go"], embed = [":keyspace"], flaky = True, - shard_count = 3, + shard_count = 2, deps = [ "//pkg/config", "//pkg/config/kerneltype", - "@com_github_pingcap_kvproto//pkg/keyspacepb", "@com_github_stretchr_testify//require", - "@org_uber_go_zap//:zap", - "@org_uber_go_zap//zaptest/observer", ], ) diff --git a/pkg/keyspace/keyspace.go b/pkg/keyspace/keyspace.go index 1207bd30c6fab..86eba95040998 100644 --- a/pkg/keyspace/keyspace.go +++ b/pkg/keyspace/keyspace.go @@ -18,7 +18,6 @@ import ( "fmt" "sync" - "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/kerneltype" @@ -62,7 +61,6 @@ func GetKeyspaceNameBySettings() (keyspaceName string) { var keyspaceNameBytes []byte var genKeyspaceNameOnce sync.Once -var keyspaceMeta sync.Map // GetKeyspaceNameBytesBySettings is used to get keyspace name setting as a byte slice. func GetKeyspaceNameBytesBySettings() []byte { @@ -82,33 +80,12 @@ func IsKeyspaceNameEmpty(keyspaceName string) bool { return keyspaceName == "" } -// SetKeyspaceMeta stores keyspace metadata loaded at startup for later reuse. -func SetKeyspaceMeta(meta *keyspacepb.KeyspaceMeta) { - if meta == nil || IsKeyspaceNameEmpty(meta.GetName()) { - return - } - keyspaceMeta.Store(meta.GetName(), meta) -} - -// GetKeyspaceMeta returns keyspace metadata loaded at startup. -func GetKeyspaceMeta(keyspaceName string) (*keyspacepb.KeyspaceMeta, bool) { - meta, ok := keyspaceMeta.Load(keyspaceName) - if !ok { - return nil, false - } - return meta.(*keyspacepb.KeyspaceMeta), true -} - // WrapZapcoreWithKeyspace is used to wrap zapcore.Core. func WrapZapcoreWithKeyspace() zap.Option { return zap.WrapCore(func(core zapcore.Core) zapcore.Core { keyspaceName := GetKeyspaceNameBySettings() if !IsKeyspaceNameEmpty(keyspaceName) { - fields := []zap.Field{zap.String("keyspaceName", keyspaceName)} - if meta, ok := GetKeyspaceMeta(keyspaceName); ok { - fields = append(fields, zap.Uint32("keyspaceID", meta.GetId())) - } - core = core.With(fields) + core = core.With([]zap.Field{zap.String("keyspaceName", keyspaceName)}) } return core }) diff --git a/pkg/keyspace/keyspace_test.go b/pkg/keyspace/keyspace_test.go index b6a205b6cd45d..50e64bd4b032d 100644 --- a/pkg/keyspace/keyspace_test.go +++ b/pkg/keyspace/keyspace_test.go @@ -18,12 +18,9 @@ import ( "sync" "testing" - "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/kerneltype" "github.com/stretchr/testify/require" - "go.uber.org/zap" - "go.uber.org/zap/zaptest/observer" ) func TestSetKeyspaceNameInConf(t *testing.T) { @@ -74,41 +71,6 @@ func TestNoKeyspaceNameSet(t *testing.T) { } } -func TestSetKeyspaceMeta(t *testing.T) { - restore := config.RestoreFunc() - defer restore() - t.Cleanup(func() { - keyspaceMeta.Delete("test_keyspace") - }) - - config.UpdateGlobal(func(conf *config.Config) { - conf.KeyspaceName = "" - }) - - meta := &keyspacepb.KeyspaceMeta{Id: 42, Name: "test_keyspace"} - SetKeyspaceMeta(meta) - - got, ok := GetKeyspaceMeta("test_keyspace") - require.True(t, ok) - require.Equal(t, uint32(42), got.GetId()) - require.Equal(t, "test_keyspace", got.GetName()) - - SetKeyspaceMeta(nil) - _, ok = GetKeyspaceMeta("") - require.False(t, ok) - - config.UpdateGlobal(func(conf *config.Config) { - conf.KeyspaceName = "test_keyspace" - }) - core, logs := observer.New(zap.InfoLevel) - logger := zap.New(core, WrapZapcoreWithKeyspace()) - logger.Info("test") - entries := logs.All() - require.Len(t, entries, 1) - require.Equal(t, "test_keyspace", entries[0].ContextMap()["keyspaceName"]) - require.Equal(t, uint32(42), entries[0].ContextMap()["keyspaceID"]) -} - func BenchmarkGetKeyspaceNameBytesBySettings(b *testing.B) { if !kerneltype.IsNextGen() { b.Skip("NextGen is not enabled, skipping benchmark") diff --git a/pkg/standby/BUILD.bazel b/pkg/standby/BUILD.bazel index dc917bccb6cb6..01a5240b86a86 100644 --- a/pkg/standby/BUILD.bazel +++ b/pkg/standby/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "standby", @@ -18,3 +18,12 @@ go_library( "@org_uber_go_zap//:zap", ], ) + +go_test( + name = "standby_test", + timeout = "short", + srcs = ["standby_test.go"], + embed = [":standby"], + flaky = True, + deps = ["@com_github_stretchr_testify//require"], +) diff --git a/pkg/standby/standby.go b/pkg/standby/standby.go index 267c80fa0b947..5a6c804423f12 100644 --- a/pkg/standby/standby.go +++ b/pkg/standby/standby.go @@ -48,8 +48,9 @@ const ( // ActivateRequest is the request body for activating the tidb server. type ActivateRequest struct { - KeyspaceName string `json:"keyspace_name"` - MaxIdleSeconds uint `json:"max_idle_seconds"` + KeyspaceName string `json:"keyspace_name"` + MaxIdleSeconds uint `json:"max_idle_seconds"` + Metadata map[string]string `json:"metadata,omitempty"` // analyze table RunAutoAnalyze bool `json:"run_auto_analyze"` @@ -166,6 +167,20 @@ func IsPreTidbNormalRestart(keyspaceName string) (bool, string) { return true, preTidbNormalRestartMsg } +// ActivationMetadata returns a copy of metadata carried by the activate request. +func (c *LoadKeyspaceController) ActivationMetadata() map[string]string { + mu.RLock() + defer mu.RUnlock() + if len(activateRequest.Metadata) == 0 { + return nil + } + metadata := make(map[string]string, len(activateRequest.Metadata)) + for k, v := range activateRequest.Metadata { + metadata[k] = v + } + return metadata +} + // Handler returns a handler to query tidb pool status or activate or exit the tidb server. func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.ServeMux) { mux := http.NewServeMux() @@ -349,7 +364,12 @@ func (c *LoadKeyspaceController) WaitForActivate() { <-activateCh - logutil.BgLogger().Info("standby receive activate request", zap.Any("activate-request", activateRequest)) + logutil.BgLogger().Info("standby receive activate request", + zap.String("keyspace-name", activateRequest.KeyspaceName), + zap.Uint("max-idle-seconds", activateRequest.MaxIdleSeconds), + zap.Bool("run-auto-analyze", activateRequest.RunAutoAnalyze), + zap.Bool("tidb-enable-ddl", activateRequest.TiDBEnableDDL), + zap.Int("metadata-count", len(activateRequest.Metadata))) config.UpdateGlobal(func(c *config.Config) { c.KeyspaceName = activateRequest.KeyspaceName diff --git a/pkg/standby/standby_test.go b/pkg/standby/standby_test.go new file mode 100644 index 0000000000000..a495006764b4d --- /dev/null +++ b/pkg/standby/standby_test.go @@ -0,0 +1,53 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package standby + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestActivateRequestMetadata(t *testing.T) { + var req ActivateRequest + require.NoError(t, json.Unmarshal([]byte(`{ + "keyspace_name": "ks", + "metadata": { + "keyspace_id": "42", + "meta_a": "value_a" + } + }`), &req)) + require.Equal(t, map[string]string{ + "keyspace_id": "42", + "meta_a": "value_a", + }, req.Metadata) + + mu.Lock() + originalRequest := activateRequest + activateRequest = req + mu.Unlock() + t.Cleanup(func() { + mu.Lock() + activateRequest = originalRequest + mu.Unlock() + }) + + controller := NewLoadKeyspaceController() + metadata := controller.ActivationMetadata() + require.Equal(t, req.Metadata, metadata) + metadata["meta_a"] = "changed" + require.Equal(t, "value_a", controller.ActivationMetadata()["meta_a"]) +} diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 76c0cb2f0f091..4a12f2dcacffb 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -55,7 +55,7 @@ const defaultKeyspaceLabel = "keyspace_id" // RegisterMetrics registers metrics with keyspace metadata labels when available. func RegisterMetrics() error { cfg := config.GetGlobalConfig() - if !keyspace.IsKeyspaceNameEmpty(cfg.KeyspaceName) && kerneltype.IsNextGen() { + if kerneltype.IsNextGen() { metricscommon.SetConstLabels("keyspace_name", cfg.KeyspaceName) } return registerMetrics() From 295ec06a6f2b9f890b8b59984275f747bdcf817d Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 25 May 2026 18:43:14 +0800 Subject: [PATCH 10/18] standby: pass keyspace ID in activate request --- cmd/tidb-server/main.go | 14 ++++++++------ cmd/tidb-server/main_test.go | 14 +++++++------- pkg/standby/standby.go | 13 +++++++++++++ pkg/standby/standby_test.go | 13 ++++++++++--- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 0a2cfddd9d54b..90f1c80b10067 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -315,6 +315,7 @@ func main() { } var standbyController server.StandbyController + var activationKeyspaceID *uint32 var activationMetadata map[string]string if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() @@ -333,6 +334,7 @@ func main() { // need to validate config again in case of config change via standby terror.MustNil(config.GetGlobalConfig().Valid()) if c, ok := standbyController.(*standby.LoadKeyspaceController); ok { + activationKeyspaceID = c.ActivationKeyspaceID() activationMetadata = c.ActivationMetadata() } } @@ -340,7 +342,7 @@ func main() { signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) - err = prepareKeyspaceObservability(activationMetadata) + err = prepareKeyspaceObservability(activationKeyspaceID, activationMetadata) terror.MustNil(err) err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1158,23 +1160,23 @@ const ( keyspaceNameMetricLabel = "keyspace_name" ) -func prepareKeyspaceObservability(metadata map[string]string) error { +func prepareKeyspaceObservability(keyspaceID *uint32, metadata map[string]string) error { cfg := config.GetGlobalConfig() if !kerneltype.IsNextGen() || cfg.Store != config.StoreTypeTiKV { return nil } metricscommon.SetConstLabels(keyspaceNameMetricLabel, cfg.KeyspaceName) - return prepareKeyspaceObservabilityWithMetadata(metadata, cfg.KeyspaceName, deploymode.IsStarter()) + return prepareKeyspaceObservabilityWithMetadata(keyspaceID, metadata, cfg.KeyspaceName, deploymode.IsStarter()) } -func prepareKeyspaceObservabilityWithMetadata(metadata map[string]string, keyspaceName string, includeConfiguredFields bool) error { +func prepareKeyspaceObservabilityWithMetadata(keyspaceID *uint32, metadata map[string]string, keyspaceName string, includeConfiguredFields bool) error { resolvedValues := config.KeyspaceObservabilityValues{ MetricLabels: map[string]string{ keyspaceNameMetricLabel: keyspaceName, }, } - if keyspaceID, ok := metadata[keyspaceIDMetricLabel]; ok { - resolvedValues.MetricLabels[keyspaceIDMetricLabel] = keyspaceID + if keyspaceID != nil { + resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(*keyspaceID) } if includeConfiguredFields { copiedConfig := *config.GetGlobalConfig() diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 90bdb6ce0e144..0f957626202e2 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -170,9 +170,9 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { } }) - err := prepareKeyspaceObservabilityWithMetadata(map[string]string{ - keyspaceIDMetricLabel: "42", - "meta_a": "value_a", + keyspaceID := uint32(42) + err := prepareKeyspaceObservabilityWithMetadata(&keyspaceID, map[string]string{ + "meta_a": "value_a", }, "ks", true) require.NoError(t, err) @@ -186,9 +186,9 @@ func TestSetupKeyspaceObservabilityForNonStarter(t *testing.T) { restore := config.RestoreFunc() defer restore() - err := prepareKeyspaceObservabilityWithMetadata(map[string]string{ - keyspaceIDMetricLabel: "42", - "meta_a": "value_a", + keyspaceID := uint32(42) + err := prepareKeyspaceObservabilityWithMetadata(&keyspaceID, map[string]string{ + "meta_a": "value_a", }, "ks", false) require.NoError(t, err) @@ -211,6 +211,6 @@ func TestSetupKeyspaceObservabilityForStartSkipsClassic(t *testing.T) { conf.KeyspaceName = "test_keyspace" }) - require.NoError(t, prepareKeyspaceObservability(nil)) + require.NoError(t, prepareKeyspaceObservability(nil, nil)) require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) } diff --git a/pkg/standby/standby.go b/pkg/standby/standby.go index 5a6c804423f12..1d7e9334d41ef 100644 --- a/pkg/standby/standby.go +++ b/pkg/standby/standby.go @@ -49,6 +49,7 @@ const ( // ActivateRequest is the request body for activating the tidb server. type ActivateRequest struct { KeyspaceName string `json:"keyspace_name"` + KeyspaceID *uint32 `json:"keyspace_id,omitempty"` MaxIdleSeconds uint `json:"max_idle_seconds"` Metadata map[string]string `json:"metadata,omitempty"` @@ -181,6 +182,17 @@ func (c *LoadKeyspaceController) ActivationMetadata() map[string]string { return metadata } +// ActivationKeyspaceID returns the keyspace ID carried by the activate request. +func (c *LoadKeyspaceController) ActivationKeyspaceID() *uint32 { + mu.RLock() + defer mu.RUnlock() + if activateRequest.KeyspaceID == nil { + return nil + } + keyspaceID := *activateRequest.KeyspaceID + return &keyspaceID +} + // Handler returns a handler to query tidb pool status or activate or exit the tidb server. func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.ServeMux) { mux := http.NewServeMux() @@ -366,6 +378,7 @@ func (c *LoadKeyspaceController) WaitForActivate() { logutil.BgLogger().Info("standby receive activate request", zap.String("keyspace-name", activateRequest.KeyspaceName), + zap.Uint32p("keyspace-id", activateRequest.KeyspaceID), zap.Uint("max-idle-seconds", activateRequest.MaxIdleSeconds), zap.Bool("run-auto-analyze", activateRequest.RunAutoAnalyze), zap.Bool("tidb-enable-ddl", activateRequest.TiDBEnableDDL), diff --git a/pkg/standby/standby_test.go b/pkg/standby/standby_test.go index a495006764b4d..bf2f3aca032d4 100644 --- a/pkg/standby/standby_test.go +++ b/pkg/standby/standby_test.go @@ -25,14 +25,15 @@ func TestActivateRequestMetadata(t *testing.T) { var req ActivateRequest require.NoError(t, json.Unmarshal([]byte(`{ "keyspace_name": "ks", + "keyspace_id": 42, "metadata": { - "keyspace_id": "42", "meta_a": "value_a" } }`), &req)) + require.NotNil(t, req.KeyspaceID) + require.Equal(t, uint32(42), *req.KeyspaceID) require.Equal(t, map[string]string{ - "keyspace_id": "42", - "meta_a": "value_a", + "meta_a": "value_a", }, req.Metadata) mu.Lock() @@ -50,4 +51,10 @@ func TestActivateRequestMetadata(t *testing.T) { require.Equal(t, req.Metadata, metadata) metadata["meta_a"] = "changed" require.Equal(t, "value_a", controller.ActivationMetadata()["meta_a"]) + + keyspaceID := controller.ActivationKeyspaceID() + require.NotNil(t, keyspaceID) + require.Equal(t, uint32(42), *keyspaceID) + *keyspaceID = 43 + require.Equal(t, uint32(42), *controller.ActivationKeyspaceID()) } From 7ac53e559ca7458d2415e22c2012a167353e4565 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 10:32:31 +0800 Subject: [PATCH 11/18] standby: require keyspace ID in activate request --- cmd/tidb-server/BUILD.bazel | 2 ++ cmd/tidb-server/main.go | 11 ++++--- cmd/tidb-server/main_test.go | 9 +++--- pkg/standby/BUILD.bazel | 1 + pkg/standby/standby.go | 12 +++---- pkg/standby/standby_test.go | 63 +++++++++++++++++++++++++++++++++--- 6 files changed, 76 insertions(+), 22 deletions(-) diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index a52041c6a4a35..fa4001dec9cc0 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -73,6 +73,7 @@ go_library( "@com_github_prometheus_client_golang//prometheus/push", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//txnkv/transaction", + "@com_github_tikv_pd_client//constants", "@org_uber_go_automaxprocs//maxprocs", "@org_uber_go_zap//:zap", ], @@ -118,6 +119,7 @@ go_test( "//pkg/sessionctx/variable", "//pkg/testkit/testsetup", "@com_github_stretchr_testify//require", + "@com_github_tikv_pd_client//constants", "@io_opencensus_go//stats/view", "@org_uber_go_goleak//:goleak", ], diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 90f1c80b10067..5dd4309436d63 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -93,6 +93,7 @@ import ( "github.com/prometheus/client_golang/prometheus/push" "github.com/tikv/client-go/v2/tikv" "github.com/tikv/client-go/v2/txnkv/transaction" + "github.com/tikv/pd/client/constants" "go.uber.org/automaxprocs/maxprocs" "go.uber.org/zap" ) @@ -315,7 +316,7 @@ func main() { } var standbyController server.StandbyController - var activationKeyspaceID *uint32 + activationKeyspaceID := constants.NullKeyspaceID var activationMetadata map[string]string if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() @@ -1160,7 +1161,7 @@ const ( keyspaceNameMetricLabel = "keyspace_name" ) -func prepareKeyspaceObservability(keyspaceID *uint32, metadata map[string]string) error { +func prepareKeyspaceObservability(keyspaceID uint32, metadata map[string]string) error { cfg := config.GetGlobalConfig() if !kerneltype.IsNextGen() || cfg.Store != config.StoreTypeTiKV { return nil @@ -1169,14 +1170,14 @@ func prepareKeyspaceObservability(keyspaceID *uint32, metadata map[string]string return prepareKeyspaceObservabilityWithMetadata(keyspaceID, metadata, cfg.KeyspaceName, deploymode.IsStarter()) } -func prepareKeyspaceObservabilityWithMetadata(keyspaceID *uint32, metadata map[string]string, keyspaceName string, includeConfiguredFields bool) error { +func prepareKeyspaceObservabilityWithMetadata(keyspaceID uint32, metadata map[string]string, keyspaceName string, includeConfiguredFields bool) error { resolvedValues := config.KeyspaceObservabilityValues{ MetricLabels: map[string]string{ keyspaceNameMetricLabel: keyspaceName, }, } - if keyspaceID != nil { - resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(*keyspaceID) + if keyspaceID != constants.NullKeyspaceID { + resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(keyspaceID) } if includeConfiguredFields { copiedConfig := *config.GetGlobalConfig() diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 0f957626202e2..3cb8f505e5755 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/testkit/testsetup" "github.com/stretchr/testify/require" + "github.com/tikv/pd/client/constants" "go.opencensus.io/stats/view" "go.uber.org/goleak" ) @@ -170,8 +171,7 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { } }) - keyspaceID := uint32(42) - err := prepareKeyspaceObservabilityWithMetadata(&keyspaceID, map[string]string{ + err := prepareKeyspaceObservabilityWithMetadata(42, map[string]string{ "meta_a": "value_a", }, "ks", true) require.NoError(t, err) @@ -186,8 +186,7 @@ func TestSetupKeyspaceObservabilityForNonStarter(t *testing.T) { restore := config.RestoreFunc() defer restore() - keyspaceID := uint32(42) - err := prepareKeyspaceObservabilityWithMetadata(&keyspaceID, map[string]string{ + err := prepareKeyspaceObservabilityWithMetadata(42, map[string]string{ "meta_a": "value_a", }, "ks", false) require.NoError(t, err) @@ -211,6 +210,6 @@ func TestSetupKeyspaceObservabilityForStartSkipsClassic(t *testing.T) { conf.KeyspaceName = "test_keyspace" }) - require.NoError(t, prepareKeyspaceObservability(nil, nil)) + require.NoError(t, prepareKeyspaceObservability(constants.NullKeyspaceID, nil)) require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) } diff --git a/pkg/standby/BUILD.bazel b/pkg/standby/BUILD.bazel index 01a5240b86a86..897be22874a26 100644 --- a/pkg/standby/BUILD.bazel +++ b/pkg/standby/BUILD.bazel @@ -25,5 +25,6 @@ go_test( srcs = ["standby_test.go"], embed = [":standby"], flaky = True, + shard_count = 3, deps = ["@com_github_stretchr_testify//require"], ) diff --git a/pkg/standby/standby.go b/pkg/standby/standby.go index 1d7e9334d41ef..2cbedce2ee3ab 100644 --- a/pkg/standby/standby.go +++ b/pkg/standby/standby.go @@ -183,14 +183,10 @@ func (c *LoadKeyspaceController) ActivationMetadata() map[string]string { } // ActivationKeyspaceID returns the keyspace ID carried by the activate request. -func (c *LoadKeyspaceController) ActivationKeyspaceID() *uint32 { +func (c *LoadKeyspaceController) ActivationKeyspaceID() uint32 { mu.RLock() defer mu.RUnlock() - if activateRequest.KeyspaceID == nil { - return nil - } - keyspaceID := *activateRequest.KeyspaceID - return &keyspaceID + return *activateRequest.KeyspaceID } // Handler returns a handler to query tidb pool status or activate or exit the tidb server. @@ -203,7 +199,7 @@ func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.Serv w.WriteHeader(http.StatusBadRequest) return } - if req.KeyspaceName == "" { + if req.KeyspaceName == "" || req.KeyspaceID == nil { w.WriteHeader(http.StatusBadRequest) return } @@ -222,7 +218,7 @@ func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.Serv logutil.BgLogger().Error("failed to write response", zap.Error(err)) } return - case activateRequest.KeyspaceName != req.KeyspaceName: + case activateRequest.KeyspaceName != req.KeyspaceName || *activateRequest.KeyspaceID != *req.KeyspaceID: mu.Unlock() w.WriteHeader(http.StatusPreconditionFailed) _, err := w.Write([]byte("server is not in standby mode")) diff --git a/pkg/standby/standby_test.go b/pkg/standby/standby_test.go index bf2f3aca032d4..783c6ebfa4ddb 100644 --- a/pkg/standby/standby_test.go +++ b/pkg/standby/standby_test.go @@ -16,6 +16,10 @@ package standby import ( "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "strings" "testing" "github.com/stretchr/testify/require" @@ -36,6 +40,11 @@ func TestActivateRequestMetadata(t *testing.T) { "meta_a": "value_a", }, req.Metadata) + var zeroKeyspaceIDReq ActivateRequest + require.NoError(t, json.Unmarshal([]byte(`{"keyspace_name":"ks","keyspace_id":0}`), &zeroKeyspaceIDReq)) + require.NotNil(t, zeroKeyspaceIDReq.KeyspaceID) + require.Equal(t, uint32(0), *zeroKeyspaceIDReq.KeyspaceID) + mu.Lock() originalRequest := activateRequest activateRequest = req @@ -53,8 +62,54 @@ func TestActivateRequestMetadata(t *testing.T) { require.Equal(t, "value_a", controller.ActivationMetadata()["meta_a"]) keyspaceID := controller.ActivationKeyspaceID() - require.NotNil(t, keyspaceID) - require.Equal(t, uint32(42), *keyspaceID) - *keyspaceID = 43 - require.Equal(t, uint32(42), *controller.ActivationKeyspaceID()) + require.Equal(t, uint32(42), keyspaceID) + keyspaceID = 43 + require.Equal(t, uint32(42), controller.ActivationKeyspaceID()) +} + +func TestActivateRequiresKeyspaceID(t *testing.T) { + controller := NewLoadKeyspaceController() + _, mux := controller.Handler(nil) + req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{"keyspace_name":"ks"}`)) + resp := httptest.NewRecorder() + + mux.ServeHTTP(resp, req) + + require.Equal(t, http.StatusBadRequest, resp.Code) +} + +func TestActivateRejectsMismatchedKeyspaceID(t *testing.T) { + mu.Lock() + originalState, originalRequest := state, activateRequest + state = standbyState + activateRequest = ActivateRequest{} + mu.Unlock() + t.Cleanup(func() { + mu.Lock() + state = originalState + activateRequest = originalRequest + mu.Unlock() + }) + + controller := NewLoadKeyspaceController() + _, mux := controller.Handler(nil) + firstRespCode := make(chan int, 1) + go func() { + req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{"keyspace_name":"ks","keyspace_id":42}`)) + resp := httptest.NewRecorder() + mux.ServeHTTP(resp, req) + firstRespCode <- resp.Code + }() + t.Cleanup(func() { + controller.EndStandby(errors.New("test done")) + require.Equal(t, http.StatusInternalServerError, <-firstRespCode) + }) + + <-activateCh + req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{"keyspace_name":"ks","keyspace_id":43}`)) + resp := httptest.NewRecorder() + + mux.ServeHTTP(resp, req) + + require.Equal(t, http.StatusPreconditionFailed, resp.Code) } From cfdc3e265658a23c49e9690fa5f5eef835e82bbd Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 13:50:31 +0800 Subject: [PATCH 12/18] *: address keyspace observability comments --- cmd/tidb-server/BUILD.bazel | 5 +--- cmd/tidb-server/main.go | 45 ++++++++++++++-------------- cmd/tidb-server/main_test.go | 35 +++++++--------------- pkg/config/BUILD.bazel | 1 + pkg/config/config.toml.example | 9 ------ pkg/config/keyspace_observability.go | 24 +++++---------- pkg/metrics/common/wrapper.go | 8 ----- pkg/standby/standby.go | 9 +++--- pkg/util/metricsutil/common.go | 16 +++++++--- pkg/util/stmtsummary/v2/logger.go | 16 ++++++++-- 10 files changed, 72 insertions(+), 96 deletions(-) diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index fa4001dec9cc0..b4e78ac7e11c5 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -19,7 +19,6 @@ go_library( "//pkg/keyspace", "//pkg/kv", "//pkg/metrics", - "//pkg/metrics/common", "//pkg/parser/mysql", "//pkg/parser/terror", "//pkg/parser/types", @@ -73,7 +72,6 @@ go_library( "@com_github_prometheus_client_golang//prometheus/push", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//txnkv/transaction", - "@com_github_tikv_pd_client//constants", "@org_uber_go_automaxprocs//maxprocs", "@org_uber_go_zap//:zap", ], @@ -109,7 +107,7 @@ go_test( srcs = ["main_test.go"], embed = [":tidb-server_lib"], flaky = True, - shard_count = 9, + shard_count = 8, deps = [ "//pkg/config", "//pkg/config/deploymode", @@ -119,7 +117,6 @@ go_test( "//pkg/sessionctx/variable", "//pkg/testkit/testsetup", "@com_github_stretchr_testify//require", - "@com_github_tikv_pd_client//constants", "@io_opencensus_go//stats/view", "@org_uber_go_goleak//:goleak", ], diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 5dd4309436d63..3b80f40d59645 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -44,7 +44,6 @@ import ( "github.com/pingcap/tidb/pkg/keyspace" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/metrics" - metricscommon "github.com/pingcap/tidb/pkg/metrics/common" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/terror" parsertypes "github.com/pingcap/tidb/pkg/parser/types" @@ -93,7 +92,6 @@ import ( "github.com/prometheus/client_golang/prometheus/push" "github.com/tikv/client-go/v2/tikv" "github.com/tikv/client-go/v2/txnkv/transaction" - "github.com/tikv/pd/client/constants" "go.uber.org/automaxprocs/maxprocs" "go.uber.org/zap" ) @@ -316,8 +314,9 @@ func main() { } var standbyController server.StandbyController - activationKeyspaceID := constants.NullKeyspaceID + var activationKeyspaceID uint32 var activationMetadata map[string]string + hasActivationRequest := false if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() } @@ -337,14 +336,17 @@ func main() { if c, ok := standbyController.(*standby.LoadKeyspaceController); ok { activationKeyspaceID = c.ActivationKeyspaceID() activationMetadata = c.ActivationMetadata() + hasActivationRequest = true } } signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) - err = prepareKeyspaceObservability(activationKeyspaceID, activationMetadata) - terror.MustNil(err) + if deploymode.IsStarter() && hasActivationRequest { + err = prepareKeyspaceObservabilityForStarter(activationKeyspaceID, activationMetadata) + terror.MustNil(err) + } err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1161,36 +1163,33 @@ const ( keyspaceNameMetricLabel = "keyspace_name" ) -func prepareKeyspaceObservability(keyspaceID uint32, metadata map[string]string) error { +func prepareKeyspaceObservabilityForStarter(keyspaceID uint32, metadata map[string]string) error { cfg := config.GetGlobalConfig() - if !kerneltype.IsNextGen() || cfg.Store != config.StoreTypeTiKV { + if cfg.Store != config.StoreTypeTiKV { return nil } - metricscommon.SetConstLabels(keyspaceNameMetricLabel, cfg.KeyspaceName) - return prepareKeyspaceObservabilityWithMetadata(keyspaceID, metadata, cfg.KeyspaceName, deploymode.IsStarter()) + return prepareKeyspaceObservabilityWithMetadata(keyspaceID, metadata, cfg.KeyspaceName) } -func prepareKeyspaceObservabilityWithMetadata(keyspaceID uint32, metadata map[string]string, keyspaceName string, includeConfiguredFields bool) error { +func prepareKeyspaceObservabilityWithMetadata(keyspaceID uint32, metadata map[string]string, keyspaceName string) error { resolvedValues := config.KeyspaceObservabilityValues{ MetricLabels: map[string]string{ keyspaceNameMetricLabel: keyspaceName, }, } - if keyspaceID != constants.NullKeyspaceID { - resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(keyspaceID) + resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(keyspaceID) + + copiedConfig := *config.GetGlobalConfig() + if err := copiedConfig.ResolveKeyspaceObservability(metadata); err != nil { + return err } - if includeConfiguredFields { - copiedConfig := *config.GetGlobalConfig() - if err := copiedConfig.ResolveKeyspaceObservability(metadata); err != nil { - return err - } - configuredValues := copiedConfig.KeyspaceObservabilityValues.Clone() - for k, v := range configuredValues.MetricLabels { - resolvedValues.MetricLabels[k] = v - } - resolvedValues.SlowLogFields = configuredValues.SlowLogFields - resolvedValues.StmtLogFields = configuredValues.StmtLogFields + configuredValues := copiedConfig.KeyspaceObservabilityValues.Clone() + for k, v := range configuredValues.MetricLabels { + resolvedValues.MetricLabels[k] = v } + resolvedValues.SlowLogFields = configuredValues.SlowLogFields + resolvedValues.StmtLogFields = configuredValues.StmtLogFields + config.UpdateGlobal(func(conf *config.Config) { conf.KeyspaceObservabilityValues = resolvedValues }) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 3cb8f505e5755..5e55299b191e2 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -26,7 +26,6 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/testkit/testsetup" "github.com/stretchr/testify/require" - "github.com/tikv/pd/client/constants" "go.opencensus.io/stats/view" "go.uber.org/goleak" ) @@ -159,6 +158,11 @@ func TestSetVersionByConfigNormalizeLegacyPlaceholderForNextGen(t *testing.T) { func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { restore := config.RestoreFunc() defer restore() + + err := prepareKeyspaceObservabilityWithMetadata(42, nil, "ks") + require.NoError(t, err) + require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks"}, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) + config.UpdateGlobal(func(conf *config.Config) { conf.KeyspaceObservability = config.KeyspaceObservability{ Fields: []config.KeyspaceObservabilityField{{ @@ -171,9 +175,9 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { } }) - err := prepareKeyspaceObservabilityWithMetadata(42, map[string]string{ + err = prepareKeyspaceObservabilityWithMetadata(42, map[string]string{ "meta_a": "value_a", - }, "ks", true) + }, "ks") require.NoError(t, err) cfg := config.GetGlobalConfig() @@ -182,34 +186,15 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilityStmtLogFields()) } -func TestSetupKeyspaceObservabilityForNonStarter(t *testing.T) { - restore := config.RestoreFunc() - defer restore() - - err := prepareKeyspaceObservabilityWithMetadata(42, map[string]string{ - "meta_a": "value_a", - }, "ks", false) - require.NoError(t, err) - - cfg := config.GetGlobalConfig() - require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks"}, cfg.GetKeyspaceObservabilityMetricLabels()) - require.Empty(t, cfg.GetKeyspaceObservabilitySlowLogFields()) - require.Empty(t, cfg.GetKeyspaceObservabilityStmtLogFields()) -} - -func TestSetupKeyspaceObservabilityForStartSkipsClassic(t *testing.T) { - if !kerneltype.IsClassic() { - t.Skip("only verifies the classic-mode short-circuit path") - } - +func TestSetupKeyspaceObservabilityForStarterSkipsNonTiKV(t *testing.T) { restore := config.RestoreFunc() defer restore() config.UpdateGlobal(func(conf *config.Config) { - conf.Store = config.StoreTypeTiKV + conf.Store = config.StoreTypeUniStore conf.Path = "invalid-pd-path" conf.KeyspaceName = "test_keyspace" }) - require.NoError(t, prepareKeyspaceObservability(constants.NullKeyspaceID, nil)) + require.NoError(t, prepareKeyspaceObservabilityForStarter(42, nil)) require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) } diff --git a/pkg/config/BUILD.bazel b/pkg/config/BUILD.bazel index 0f185041d8813..3787f75caebfb 100644 --- a/pkg/config/BUILD.bazel +++ b/pkg/config/BUILD.bazel @@ -24,6 +24,7 @@ go_library( "@com_github_burntsushi_toml//:toml", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_log//:log", + "@com_github_prometheus_common//model", "@com_github_tikv_client_go_v2//config", "@com_github_uber_jaeger_client_go//config", "@org_uber_go_atomic//:atomic", diff --git a/pkg/config/config.toml.example b/pkg/config/config.toml.example index 64b0d551ddde3..70d5f1ad6916d 100644 --- a/pkg/config/config.toml.example +++ b/pkg/config/config.toml.example @@ -480,15 +480,6 @@ tikv-raftstore-store-write-trigger-wb-bytes = 0.00006100 tikv-storage-processed-keys-batch-get = 0.00266791 tikv-storage-processed-keys-get = 0.01416829 -# Map selected keyspace metadata entries to observability outputs. -# Only valid when deploy-mode is starter. -# [[keyspace-observability.fields]] -# source = "meta_key" -# metric-label = "metric_label" -# slow-log-field = "Slow_log_field" -# stmt-log-field = "stmt_log_field" -# required = false - # instance scope variables # These options are also available as a system variable for online configuration # changes to the system variable do not persist to the cluster. You must make changes diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go index ba6da947de5c0..e2c32cd2b647f 100644 --- a/pkg/config/keyspace_observability.go +++ b/pkg/config/keyspace_observability.go @@ -17,20 +17,22 @@ package config import ( "fmt" "strings" + + "github.com/prometheus/common/model" ) // KeyspaceObservability maps metadata entries to observability outputs. type KeyspaceObservability struct { - Fields []KeyspaceObservabilityField `toml:"fields" json:"fields"` + Fields []KeyspaceObservabilityField `toml:"fields" json:"fields,omitempty"` } // KeyspaceObservabilityField describes one metadata entry mapping. type KeyspaceObservabilityField struct { - Source string `toml:"source" json:"source"` + Source string `toml:"source" json:"source,omitempty"` MetricLabel string `toml:"metric-label" json:"metric-label,omitempty"` SlowLogField string `toml:"slow-log-field" json:"slow-log-field,omitempty"` StmtLogField string `toml:"stmt-log-field" json:"stmt-log-field,omitempty"` - Required bool `toml:"required" json:"required"` + Required bool `toml:"required" json:"required,omitempty"` } // KeyspaceObservabilityValues stores resolved metadata values. @@ -365,19 +367,7 @@ func validKeyspaceObservabilityLogFieldName(field string) bool { } func validPrometheusLabelName(label string) bool { - for i, r := range label { - if i == 0 { - if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' { - continue - } - return false - } - if r == '_' || r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' || r >= '0' && r <= '9' { - continue - } - return false - } - return label != "" + return model.LabelName(label).IsValid() && model.LabelName(label).IsValidLegacy() } // ResolveKeyspaceObservability resolves configured output values from metadata. @@ -403,7 +393,7 @@ func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { resolved.StmtLogFields = append(resolved.StmtLogFields, KeyspaceObservabilityFieldPair{Key: field.StmtLogField, Value: value}) } } - c.KeyspaceObservabilityValues = resolved.Clone() + c.KeyspaceObservabilityValues = resolved return nil } diff --git a/pkg/metrics/common/wrapper.go b/pkg/metrics/common/wrapper.go index d9ab537bffb83..f668ceb7c7458 100644 --- a/pkg/metrics/common/wrapper.go +++ b/pkg/metrics/common/wrapper.go @@ -53,14 +53,6 @@ func SetConstLabels(kv ...string) { } } -// SetConstLabelsFromMap sets constant labels for metrics from a map. -func SetConstLabelsFromMap(labels map[string]string) { - constLabels = make(prometheus.Labels, len(labels)) - for k, v := range labels { - constLabels[strings.ToLower(k)] = v - } -} - // NewCounter wraps a prometheus.NewCounter. func NewCounter(opts prometheus.CounterOpts) prometheus.Counter { opts.ConstLabels = constLabels diff --git a/pkg/standby/standby.go b/pkg/standby/standby.go index 2cbedce2ee3ab..b91c054625bf9 100644 --- a/pkg/standby/standby.go +++ b/pkg/standby/standby.go @@ -48,10 +48,11 @@ const ( // ActivateRequest is the request body for activating the tidb server. type ActivateRequest struct { - KeyspaceName string `json:"keyspace_name"` - KeyspaceID *uint32 `json:"keyspace_id,omitempty"` - MaxIdleSeconds uint `json:"max_idle_seconds"` - Metadata map[string]string `json:"metadata,omitempty"` + KeyspaceName string `json:"keyspace_name"` + KeyspaceID *uint32 `json:"keyspace_id,omitempty"` + MaxIdleSeconds uint `json:"max_idle_seconds"` + // Metadata is keyspace metadata sent by the manager during activation, such as tenant, project, and cluster identifiers. + Metadata map[string]string `json:"metadata,omitempty"` // analyze table RunAutoAnalyze bool `json:"run_auto_analyze"` diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 4a12f2dcacffb..348f9ede2be66 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -50,7 +50,7 @@ import ( var componentName = caller.Component("tidb-metrics-util") -const defaultKeyspaceLabel = "keyspace_id" +const keyspaceIDLabel = "keyspace_id" // RegisterMetrics registers metrics with keyspace metadata labels when available. func RegisterMetrics() error { @@ -118,7 +118,7 @@ func registerMetrics() error { labels := cloneConstLabels() maps.Copy(labels, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) if len(labels) > 0 { - metricscommon.SetConstLabelsFromMap(labels) + setConstLabels(labels) } initMetrics() return nil @@ -134,8 +134,16 @@ func cloneConstLabels() map[string]string { func setKeyspaceIDConstLabel(keyspaceID uint32) { labels := cloneConstLabels() - labels[defaultKeyspaceLabel] = fmt.Sprint(keyspaceID) - metricscommon.SetConstLabelsFromMap(labels) + labels[keyspaceIDLabel] = fmt.Sprint(keyspaceID) + setConstLabels(labels) +} + +func setConstLabels(labels map[string]string) { + kv := make([]string, 0, len(labels)*2) + for k, v := range labels { + kv = append(kv, k, v) + } + metricscommon.SetConstLabels(kv...) } func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { diff --git a/pkg/util/stmtsummary/v2/logger.go b/pkg/util/stmtsummary/v2/logger.go index f3610d06c411b..911362000568c 100644 --- a/pkg/util/stmtsummary/v2/logger.go +++ b/pkg/util/stmtsummary/v2/logger.go @@ -85,7 +85,19 @@ func marshalStmtRecord(r *StmtRecord) ([]byte, error) { if len(fields) == 0 { return json.Marshal(r) } - b, err := json.Marshal(r) + return json.Marshal(stmtRecordWithKeyspaceFields{ + StmtRecord: r, + fields: fields, + }) +} + +type stmtRecordWithKeyspaceFields struct { + *StmtRecord + fields []config.KeyspaceObservabilityFieldPair +} + +func (r stmtRecordWithKeyspaceFields) MarshalJSON() ([]byte, error) { + b, err := json.Marshal(r.StmtRecord) if err != nil { return nil, err } @@ -96,7 +108,7 @@ func marshalStmtRecord(r *StmtRecord) ([]byte, error) { if err := json.Unmarshal(b, &items); err != nil { return nil, err } - for _, field := range fields { + for _, field := range r.fields { value, err := json.Marshal(field.Value) if err != nil { return nil, err From 2d55b8236b2d6be342c3622958571427e6a56547 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 14:26:25 +0800 Subject: [PATCH 13/18] standby: fix activate test lint --- pkg/standby/standby_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/standby/standby_test.go b/pkg/standby/standby_test.go index 783c6ebfa4ddb..33b6d002f4544 100644 --- a/pkg/standby/standby_test.go +++ b/pkg/standby/standby_test.go @@ -63,7 +63,6 @@ func TestActivateRequestMetadata(t *testing.T) { keyspaceID := controller.ActivationKeyspaceID() require.Equal(t, uint32(42), keyspaceID) - keyspaceID = 43 require.Equal(t, uint32(42), controller.ActivationKeyspaceID()) } From 738f86caa2c635aa02fc46e5018f7d39953308fb Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 15:35:41 +0800 Subject: [PATCH 14/18] cmd/tidb-server: refine keyspace observability setup --- cmd/tidb-server/main.go | 42 ++++++++++++++++++++---------------- cmd/tidb-server/main_test.go | 32 +++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 3b80f40d59645..e987844ea6f4c 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -316,7 +316,6 @@ func main() { var standbyController server.StandbyController var activationKeyspaceID uint32 var activationMetadata map[string]string - hasActivationRequest := false if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() } @@ -336,17 +335,14 @@ func main() { if c, ok := standbyController.(*standby.LoadKeyspaceController); ok { activationKeyspaceID = c.ActivationKeyspaceID() activationMetadata = c.ActivationMetadata() - hasActivationRequest = true } } signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) - if deploymode.IsStarter() && hasActivationRequest { - err = prepareKeyspaceObservabilityForStarter(activationKeyspaceID, activationMetadata) - terror.MustNil(err) - } + err = prepareKeyspaceObservability(activationKeyspaceID, activationMetadata) + terror.MustNil(err) err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1163,22 +1159,34 @@ const ( keyspaceNameMetricLabel = "keyspace_name" ) -func prepareKeyspaceObservabilityForStarter(keyspaceID uint32, metadata map[string]string) error { +func prepareKeyspaceObservability(keyspaceID uint32, metadata map[string]string) error { cfg := config.GetGlobalConfig() - if cfg.Store != config.StoreTypeTiKV { + + if kerneltype.IsClassic() || cfg.Store != config.StoreTypeTiKV { return nil } - return prepareKeyspaceObservabilityWithMetadata(keyspaceID, metadata, cfg.KeyspaceName) -} - -func prepareKeyspaceObservabilityWithMetadata(keyspaceID uint32, metadata map[string]string, keyspaceName string) error { - resolvedValues := config.KeyspaceObservabilityValues{ + resolvedValues := &config.KeyspaceObservabilityValues{ MetricLabels: map[string]string{ - keyspaceNameMetricLabel: keyspaceName, + keyspaceNameMetricLabel: cfg.KeyspaceName, }, } - resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(keyspaceID) + if deploymode.IsStarter() { + resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(keyspaceID) + err := prepareKeyspaceObservabilityForStarter(metadata, resolvedValues) + if err != nil { + return err + } + } + + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservabilityValues = *resolvedValues + }) + + return nil +} + +func prepareKeyspaceObservabilityForStarter(metadata map[string]string, resolvedValues *config.KeyspaceObservabilityValues) error { copiedConfig := *config.GetGlobalConfig() if err := copiedConfig.ResolveKeyspaceObservability(metadata); err != nil { return err @@ -1189,10 +1197,6 @@ func prepareKeyspaceObservabilityWithMetadata(keyspaceID uint32, metadata map[st } resolvedValues.SlowLogFields = configuredValues.SlowLogFields resolvedValues.StmtLogFields = configuredValues.StmtLogFields - - config.UpdateGlobal(func(conf *config.Config) { - conf.KeyspaceObservabilityValues = resolvedValues - }) return nil } diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 5e55299b191e2..0a8eacdf31733 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -156,10 +156,24 @@ func TestSetVersionByConfigNormalizeLegacyPlaceholderForNextGen(t *testing.T) { } func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { + if kerneltype.IsClassic() { + t.Skip("only for nextgen kernel") + } restore := config.RestoreFunc() defer restore() - err := prepareKeyspaceObservabilityWithMetadata(42, nil, "ks") + originalMode := deploymode.Get() + t.Cleanup(func() { + require.NoError(t, deploymode.Set(originalMode)) + }) + require.NoError(t, deploymode.Set(deploymode.Starter)) + + keyspaceID := uint32(42) + config.UpdateGlobal(func(conf *config.Config) { + conf.Store = config.StoreTypeTiKV + conf.KeyspaceName = "ks" + }) + err := prepareKeyspaceObservability(keyspaceID, nil) require.NoError(t, err) require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks"}, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) @@ -175,9 +189,9 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { } }) - err = prepareKeyspaceObservabilityWithMetadata(42, map[string]string{ + err = prepareKeyspaceObservability(keyspaceID, map[string]string{ "meta_a": "value_a", - }, "ks") + }) require.NoError(t, err) cfg := config.GetGlobalConfig() @@ -187,14 +201,24 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { } func TestSetupKeyspaceObservabilityForStarterSkipsNonTiKV(t *testing.T) { + if kerneltype.IsClassic() { + t.Skip("only for nextgen kernel") + } restore := config.RestoreFunc() defer restore() + originalMode := deploymode.Get() + t.Cleanup(func() { + require.NoError(t, deploymode.Set(originalMode)) + }) + require.NoError(t, deploymode.Set(deploymode.Starter)) + config.UpdateGlobal(func(conf *config.Config) { conf.Store = config.StoreTypeUniStore conf.Path = "invalid-pd-path" conf.KeyspaceName = "test_keyspace" }) - require.NoError(t, prepareKeyspaceObservabilityForStarter(42, nil)) + keyspaceID := uint32(42) + require.NoError(t, prepareKeyspaceObservability(keyspaceID, nil)) require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) } From e4c2ab49583fc9ef7df607d0d53147c087116fc7 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 16:56:34 +0800 Subject: [PATCH 15/18] config: simplify keyspace observability fields --- cmd/tidb-server/main_test.go | 4 +-- pkg/config/config_test.go | 4 +-- pkg/config/keyspace_observability.go | 38 ++++++++++++++++------------ pkg/sessionctx/variable/slow_log.go | 8 ++++-- pkg/util/stmtsummary/v2/logger.go | 27 +++----------------- 5 files changed, 36 insertions(+), 45 deletions(-) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 0a8eacdf31733..53762625aff9f 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -196,8 +196,8 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { cfg := config.GetGlobalConfig() require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks", "keyspace_meta_label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) - require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "Slow_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilitySlowLogFields()) - require.Equal(t, []config.KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, cfg.GetKeyspaceObservabilityStmtLogFields()) + require.Equal(t, map[string]string{"Slow_meta_a": "value_a"}, cfg.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, cfg.GetKeyspaceObservabilityStmtLogFields()) } func TestSetupKeyspaceObservabilityForStarterSkipsNonTiKV(t *testing.T) { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 16adb20ccbc0c..ea7541e61aab8 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -192,8 +192,8 @@ metric-label = "keyspace_meta_label_b" "meta_b": "value_b", })) require.Equal(t, map[string]string{"keyspace_meta_label_a": "value_a", "keyspace_meta_label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) - require.Equal(t, []KeyspaceObservabilityFieldPair{{Key: "Slow_meta_a", Value: "value_a"}}, conf.GetKeyspaceObservabilitySlowLogFields()) - require.Equal(t, []KeyspaceObservabilityFieldPair{{Key: "stmt_meta_a", Value: "value_a"}}, conf.GetKeyspaceObservabilityStmtLogFields()) + require.Equal(t, map[string]string{"Slow_meta_a": "value_a"}, conf.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, conf.GetKeyspaceObservabilityStmtLogFields()) metricLabels := conf.GetKeyspaceObservabilityMetricLabels() metricLabels["keyspace_meta_label_a"] = "changed" diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go index e2c32cd2b647f..73be3771d0fc4 100644 --- a/pkg/config/keyspace_observability.go +++ b/pkg/config/keyspace_observability.go @@ -37,15 +37,9 @@ type KeyspaceObservabilityField struct { // KeyspaceObservabilityValues stores resolved metadata values. type KeyspaceObservabilityValues struct { - MetricLabels map[string]string `toml:"-" json:"-"` - SlowLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` - StmtLogFields []KeyspaceObservabilityFieldPair `toml:"-" json:"-"` -} - -// KeyspaceObservabilityFieldPair stores one resolved output field. -type KeyspaceObservabilityFieldPair struct { - Key string - Value string + MetricLabels map[string]string `toml:"-" json:"-"` + SlowLogFields map[string]string `toml:"-" json:"-"` + StmtLogFields map[string]string `toml:"-" json:"-"` } const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" @@ -373,7 +367,9 @@ func validPrometheusLabelName(label string) bool { // ResolveKeyspaceObservability resolves configured output values from metadata. func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { resolved := KeyspaceObservabilityValues{ - MetricLabels: make(map[string]string), + MetricLabels: make(map[string]string), + SlowLogFields: make(map[string]string), + StmtLogFields: make(map[string]string), } for _, field := range c.KeyspaceObservability.Fields { value, ok := values[field.Source] @@ -387,10 +383,10 @@ func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { resolved.MetricLabels[field.MetricLabel] = value } if field.SlowLogField != "" { - resolved.SlowLogFields = append(resolved.SlowLogFields, KeyspaceObservabilityFieldPair{Key: field.SlowLogField, Value: value}) + resolved.SlowLogFields[field.SlowLogField] = value } if field.StmtLogField != "" { - resolved.StmtLogFields = append(resolved.StmtLogFields, KeyspaceObservabilityFieldPair{Key: field.StmtLogField, Value: value}) + resolved.StmtLogFields[field.StmtLogField] = value } } c.KeyspaceObservabilityValues = resolved @@ -406,8 +402,18 @@ func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { res.MetricLabels[k] = value } } - res.SlowLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.SlowLogFields...) - res.StmtLogFields = append([]KeyspaceObservabilityFieldPair(nil), v.StmtLogFields...) + if len(v.SlowLogFields) > 0 { + res.SlowLogFields = make(map[string]string, len(v.SlowLogFields)) + for k, value := range v.SlowLogFields { + res.SlowLogFields[k] = value + } + } + if len(v.StmtLogFields) > 0 { + res.StmtLogFields = make(map[string]string, len(v.StmtLogFields)) + for k, value := range v.StmtLogFields { + res.StmtLogFields[k] = value + } + } return res } @@ -417,11 +423,11 @@ func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { } // GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields. -func (c *Config) GetKeyspaceObservabilitySlowLogFields() []KeyspaceObservabilityFieldPair { +func (c *Config) GetKeyspaceObservabilitySlowLogFields() map[string]string { return c.KeyspaceObservabilityValues.Clone().SlowLogFields } // GetKeyspaceObservabilityStmtLogFields returns resolved statement log fields. -func (c *Config) GetKeyspaceObservabilityStmtLogFields() []KeyspaceObservabilityFieldPair { +func (c *Config) GetKeyspaceObservabilityStmtLogFields() map[string]string { return c.KeyspaceObservabilityValues.Clone().StmtLogFields } diff --git a/pkg/sessionctx/variable/slow_log.go b/pkg/sessionctx/variable/slow_log.go index c4391885c6857..873a11b27506f 100644 --- a/pkg/sessionctx/variable/slow_log.go +++ b/pkg/sessionctx/variable/slow_log.go @@ -20,6 +20,7 @@ import ( "encoding/json" "fmt" "hash/crc64" + "maps" "math" "regexp" "slices" @@ -590,8 +591,11 @@ func (s *SessionVars) SlowLogFormat(logItems *SlowQueryLogItems) string { if logItems.PrevStmt != "" { writeSlowLogItem(&buf, SlowLogPrevStmt, logItems.PrevStmt) } - for _, field := range config.GetGlobalConfig().GetKeyspaceObservabilitySlowLogFields() { - writeSlowLogItem(&buf, field.Key, field.Value) + keyspaceFields := config.GetGlobalConfig().GetKeyspaceObservabilitySlowLogFields() + keyspaceFieldKeys := slices.Collect(maps.Keys(keyspaceFields)) + slices.Sort(keyspaceFieldKeys) + for _, key := range keyspaceFieldKeys { + writeSlowLogItem(&buf, key, keyspaceFields[key]) } if s.CurrentDBChanged { diff --git a/pkg/util/stmtsummary/v2/logger.go b/pkg/util/stmtsummary/v2/logger.go index 911362000568c..50ec8a1b386e5 100644 --- a/pkg/util/stmtsummary/v2/logger.go +++ b/pkg/util/stmtsummary/v2/logger.go @@ -85,35 +85,16 @@ func marshalStmtRecord(r *StmtRecord) ([]byte, error) { if len(fields) == 0 { return json.Marshal(r) } - return json.Marshal(stmtRecordWithKeyspaceFields{ - StmtRecord: r, - fields: fields, - }) -} - -type stmtRecordWithKeyspaceFields struct { - *StmtRecord - fields []config.KeyspaceObservabilityFieldPair -} - -func (r stmtRecordWithKeyspaceFields) MarshalJSON() ([]byte, error) { - b, err := json.Marshal(r.StmtRecord) + b, err := json.Marshal(r) if err != nil { return nil, err } - if !json.Valid(b) || len(b) < 2 || b[0] != '{' || b[len(b)-1] != '}' { - return b, nil - } - items := make(map[string]json.RawMessage) + items := make(map[string]any) if err := json.Unmarshal(b, &items); err != nil { return nil, err } - for _, field := range r.fields { - value, err := json.Marshal(field.Value) - if err != nil { - return nil, err - } - items[field.Key] = value + for key, value := range fields { + items[key] = value } return json.Marshal(items) } From 88bfdf790346412cb1a2a01d7834a005bb068215 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 17:57:54 +0800 Subject: [PATCH 16/18] config: refine keyspace observability fields --- cmd/tidb-server/main.go | 5 +- pkg/config/config_test.go | 13 --- pkg/config/keyspace_observability.go | 153 ++----------------------- pkg/util/stmtsummary/v2/logger.go | 21 ++-- pkg/util/stmtsummary/v2/record_test.go | 2 +- 5 files changed, 19 insertions(+), 175 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index e987844ea6f4c..6e5452d406336 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -19,6 +19,7 @@ import ( "flag" "fmt" "io/fs" + "maps" "os" "runtime" "strconv" @@ -1192,9 +1193,7 @@ func prepareKeyspaceObservabilityForStarter(metadata map[string]string, resolved return err } configuredValues := copiedConfig.KeyspaceObservabilityValues.Clone() - for k, v := range configuredValues.MetricLabels { - resolvedValues.MetricLabels[k] = v - } + maps.Copy(resolvedValues.MetricLabels, configuredValues.MetricLabels) resolvedValues.SlowLogFields = configuredValues.SlowLogFields resolvedValues.StmtLogFields = configuredValues.StmtLogFields return nil diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index ea7541e61aab8..a512f6479ad4c 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -195,10 +195,6 @@ metric-label = "keyspace_meta_label_b" require.Equal(t, map[string]string{"Slow_meta_a": "value_a"}, conf.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, conf.GetKeyspaceObservabilityStmtLogFields()) - metricLabels := conf.GetKeyspaceObservabilityMetricLabels() - metricLabels["keyspace_meta_label_a"] = "changed" - require.Equal(t, "value_a", conf.GetKeyspaceObservabilityMetricLabels()["keyspace_meta_label_a"]) - require.ErrorContains(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_b": "value_b"}), `missing required keyspace metadata entry "meta_a"`) } @@ -323,15 +319,6 @@ slow-log-field = "Slow_meta" `, err: `duplicated slow-log-field "Slow_meta"`, }, - { - name: "reserved stmt log field", - content: ` -[[keyspace-observability.fields]] -source = "meta_a" -stmt-log-field = "digest" -`, - err: `reserved stmt-log-field "digest"`, - }, { name: "duplicate stmt log field", content: ` diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go index 73be3771d0fc4..b93272eae0cb5 100644 --- a/pkg/config/keyspace_observability.go +++ b/pkg/config/keyspace_observability.go @@ -16,6 +16,7 @@ package config import ( "fmt" + "maps" "strings" "github.com/prometheus/common/model" @@ -164,134 +165,6 @@ var reservedKeyspaceObservabilitySlowLogFieldPrefixes = []string{ "cop_backoff_", } -var reservedKeyspaceObservabilityStmtLogFields = map[string]struct{}{ - "auth_users": {}, - "backoff_types": {}, - "begin": {}, - "binding_digest": {}, - "binding_sql": {}, - "charset": {}, - "collation": {}, - "commit_count": {}, - "digest": {}, - "end": {}, - "exec_count": {}, - "exec_retry_count": {}, - "exec_retry_time": {}, - "first_seen": {}, - "index_names": {}, - "is_internal": {}, - "keyspace_id": {}, - "keyspace_name": {}, - "last_seen": {}, - "max_backoff_time": {}, - "max_commit_backoff_time": {}, - "max_commit_time": {}, - "max_compile_latency": {}, - "max_cop_process_address": {}, - "max_cop_process_time": {}, - "max_cop_wait_address": {}, - "max_cop_wait_time": {}, - "max_disk": {}, - "max_get_commit_ts_time": {}, - "max_latency": {}, - "max_local_latch_time": {}, - "max_mem": {}, - "max_mem_arbitration": {}, - "max_parse_latency": {}, - "max_prewrite_region_num": {}, - "max_prewrite_time": {}, - "max_process_time": {}, - "max_processed_keys": {}, - "max_resolve_lock_time": {}, - "max_result_rows": {}, - "max_rocksdb_block_cache_hit_count": {}, - "max_rocksdb_block_read_byte": {}, - "max_rocksdb_block_read_count": {}, - "max_rocksdb_delete_skipped_count": {}, - "max_rocksdb_key_skipped_count": {}, - "max_rru": {}, - "max_ru_wait_duration": {}, - "max_ruv2": {}, - "max_total_keys": {}, - "max_txn_retry": {}, - "max_wait_time": {}, - "max_write_keys": {}, - "max_write_size": {}, - "max_wru": {}, - "min_latency": {}, - "min_result_rows": {}, - "normalized_sql": {}, - "plan_cache_hits": {}, - "plan_cache_unqualified_count": {}, - "plan_cache_unqualified_last_reason": {}, - "plan_digest": {}, - "plan_hint": {}, - "plan_in_binding": {}, - "plan_in_cache": {}, - "prepared": {}, - "prev_sql": {}, - "resource_group_name": {}, - "sample_binary_plan": {}, - "sample_plan": {}, - "sample_sql": {}, - "schema_name": {}, - "stmt_type": {}, - "storage_kv": {}, - "storage_mpp": {}, - "sum_affected_rows": {}, - "sum_backoff_time": {}, - "sum_backoff_times": {}, - "sum_backoff_total": {}, - "sum_commit_backoff_time": {}, - "sum_commit_time": {}, - "sum_compile_latency": {}, - "sum_disk": {}, - "sum_errors": {}, - "sum_get_commit_ts_time": {}, - "sum_kv_total": {}, - "sum_latency": {}, - "sum_local_latch_time": {}, - "sum_mem": {}, - "sum_mem_arbitration": {}, - "sum_num_cop_tasks": {}, - "sum_parse_latency": {}, - "sum_pd_total": {}, - "sum_prewrite_region_num": {}, - "sum_prewrite_time": {}, - "sum_process_time": {}, - "sum_processed_keys": {}, - "sum_resolve_lock_time": {}, - "sum_result_rows": {}, - "sum_rocksdb_block_cache_hit_count": {}, - "sum_rocksdb_block_read_byte": {}, - "sum_rocksdb_block_read_count": {}, - "sum_rocksdb_delete_skipped_count": {}, - "sum_rocksdb_key_skipped_count": {}, - "sum_rru": {}, - "sum_ru_wait_duration": {}, - "sum_ruv2": {}, - "sum_tidb_cpu": {}, - "sum_tikv_cpu": {}, - "sum_total_keys": {}, - "sum_txn_retry": {}, - "sum_wait_time": {}, - "sum_warnings": {}, - "sum_write_keys": {}, - "sum_write_size": {}, - "sum_write_sql_resp_total": {}, - "sum_wru": {}, - "table_names": {}, - "unpacked_bytes_received_tiflash_cross_zone": {}, - "unpacked_bytes_received_tiflash_total": {}, - "unpacked_bytes_received_tikv_cross_zone": {}, - "unpacked_bytes_received_tikv_total": {}, - "unpacked_bytes_send_tiflash_cross_zone": {}, - "unpacked_bytes_send_tiflash_total": {}, - "unpacked_bytes_send_tikv_cross_zone": {}, - "unpacked_bytes_send_tikv_total": {}, -} - // Valid validates metadata observability mappings. func (o KeyspaceObservability) Valid() error { metricLabels := make(map[string]struct{}, len(o.Fields)) @@ -332,9 +205,6 @@ func (o KeyspaceObservability) Valid() error { } if field.StmtLogField != "" { key := strings.ToLower(field.StmtLogField) - if _, ok := reservedKeyspaceObservabilityStmtLogFields[key]; ok { - return fmt.Errorf("[keyspace-observability.fields.%d] reserved stmt-log-field %q", i, field.StmtLogField) - } if _, ok := stmtLogFields[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] duplicated stmt-log-field %q", i, field.StmtLogField) } @@ -397,37 +267,28 @@ func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { res := KeyspaceObservabilityValues{} if len(v.MetricLabels) > 0 { - res.MetricLabels = make(map[string]string, len(v.MetricLabels)) - for k, value := range v.MetricLabels { - res.MetricLabels[k] = value - } + res.MetricLabels = maps.Clone(v.MetricLabels) } if len(v.SlowLogFields) > 0 { - res.SlowLogFields = make(map[string]string, len(v.SlowLogFields)) - for k, value := range v.SlowLogFields { - res.SlowLogFields[k] = value - } + res.SlowLogFields = maps.Clone(v.SlowLogFields) } if len(v.StmtLogFields) > 0 { - res.StmtLogFields = make(map[string]string, len(v.StmtLogFields)) - for k, value := range v.StmtLogFields { - res.StmtLogFields[k] = value - } + res.StmtLogFields = maps.Clone(v.StmtLogFields) } return res } // GetKeyspaceObservabilityMetricLabels returns resolved metric labels. func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { - return c.KeyspaceObservabilityValues.Clone().MetricLabels + return c.KeyspaceObservabilityValues.MetricLabels } // GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields. func (c *Config) GetKeyspaceObservabilitySlowLogFields() map[string]string { - return c.KeyspaceObservabilityValues.Clone().SlowLogFields + return c.KeyspaceObservabilityValues.SlowLogFields } // GetKeyspaceObservabilityStmtLogFields returns resolved statement log fields. func (c *Config) GetKeyspaceObservabilityStmtLogFields() map[string]string { - return c.KeyspaceObservabilityValues.Clone().StmtLogFields + return c.KeyspaceObservabilityValues.StmtLogFields } diff --git a/pkg/util/stmtsummary/v2/logger.go b/pkg/util/stmtsummary/v2/logger.go index 50ec8a1b386e5..629828d8e0307 100644 --- a/pkg/util/stmtsummary/v2/logger.go +++ b/pkg/util/stmtsummary/v2/logger.go @@ -85,18 +85,15 @@ func marshalStmtRecord(r *StmtRecord) ([]byte, error) { if len(fields) == 0 { return json.Marshal(r) } - b, err := json.Marshal(r) - if err != nil { - return nil, err - } - items := make(map[string]any) - if err := json.Unmarshal(b, &items); err != nil { - return nil, err - } - for key, value := range fields { - items[key] = value - } - return json.Marshal(items) + return json.Marshal(stmtRecordWithAdditionalFields{ + StmtRecord: r, + AdditionalFields: fields, + }) +} + +type stmtRecordWithAdditionalFields struct { + *StmtRecord + AdditionalFields map[string]string `json:"additional_fields"` } type stmtLogEncoder struct{} diff --git a/pkg/util/stmtsummary/v2/record_test.go b/pkg/util/stmtsummary/v2/record_test.go index 9b3700497b4f9..8fcfc88f179d2 100644 --- a/pkg/util/stmtsummary/v2/record_test.go +++ b/pkg/util/stmtsummary/v2/record_test.go @@ -101,6 +101,6 @@ func TestStmtRecord(t *testing.T) { require.NoError(t, err) items := make(map[string]any) require.NoError(t, json.Unmarshal(b, &items)) - require.Equal(t, "value_a", items["stmt_meta_a"]) + require.Equal(t, map[string]any{"stmt_meta_a": "value_a"}, items["additional_fields"]) require.Equal(t, record2.Digest, items["digest"]) } From 39ce9fa0b410bab32de954d30f6a73cf468b8eb5 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 18:43:16 +0800 Subject: [PATCH 17/18] standby: remove keyspace id from activate request --- cmd/tidb-server/main.go | 37 ++--- cmd/tidb-server/main_test.go | 16 +-- pkg/config/config.toml.nextgen.example | 4 +- pkg/config/config_test.go | 34 ++--- pkg/config/keyspace_observability.go | 136 +----------------- pkg/sessionctx/variable/tests/session_test.go | 4 +- pkg/standby/standby.go | 17 +-- pkg/standby/standby_test.go | 53 +------ 8 files changed, 49 insertions(+), 252 deletions(-) diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 6e5452d406336..43a886d31e67e 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -315,7 +315,6 @@ func main() { } var standbyController server.StandbyController - var activationKeyspaceID uint32 var activationMetadata map[string]string if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() @@ -334,7 +333,6 @@ func main() { // need to validate config again in case of config change via standby terror.MustNil(config.GetGlobalConfig().Valid()) if c, ok := standbyController.(*standby.LoadKeyspaceController); ok { - activationKeyspaceID = c.ActivationKeyspaceID() activationMetadata = c.ActivationMetadata() } } @@ -342,8 +340,10 @@ func main() { signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) - err = prepareKeyspaceObservability(activationKeyspaceID, activationMetadata) - terror.MustNil(err) + if deploymode.IsStarter() { + err = prepareKeyspaceObservabilityForStarter(activationMetadata) + terror.MustNil(err) + } err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1156,38 +1156,22 @@ func closeStmtSummary() { } const ( - keyspaceIDMetricLabel = "keyspace_id" keyspaceNameMetricLabel = "keyspace_name" ) -func prepareKeyspaceObservability(keyspaceID uint32, metadata map[string]string) error { +func prepareKeyspaceObservabilityForStarter(metadata map[string]string) error { cfg := config.GetGlobalConfig() - if kerneltype.IsClassic() || cfg.Store != config.StoreTypeTiKV { + if cfg.Store != config.StoreTypeTiKV { return nil } - resolvedValues := &config.KeyspaceObservabilityValues{ + + resolvedValues := config.KeyspaceObservabilityValues{ MetricLabels: map[string]string{ keyspaceNameMetricLabel: cfg.KeyspaceName, }, } - if deploymode.IsStarter() { - resolvedValues.MetricLabels[keyspaceIDMetricLabel] = fmt.Sprint(keyspaceID) - err := prepareKeyspaceObservabilityForStarter(metadata, resolvedValues) - if err != nil { - return err - } - } - - config.UpdateGlobal(func(conf *config.Config) { - conf.KeyspaceObservabilityValues = *resolvedValues - }) - - return nil -} - -func prepareKeyspaceObservabilityForStarter(metadata map[string]string, resolvedValues *config.KeyspaceObservabilityValues) error { copiedConfig := *config.GetGlobalConfig() if err := copiedConfig.ResolveKeyspaceObservability(metadata); err != nil { return err @@ -1196,6 +1180,11 @@ func prepareKeyspaceObservabilityForStarter(metadata map[string]string, resolved maps.Copy(resolvedValues.MetricLabels, configuredValues.MetricLabels) resolvedValues.SlowLogFields = configuredValues.SlowLogFields resolvedValues.StmtLogFields = configuredValues.StmtLogFields + + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservabilityValues = resolvedValues + }) + return nil } diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 53762625aff9f..c761026e62eb7 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -168,35 +168,34 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { }) require.NoError(t, deploymode.Set(deploymode.Starter)) - keyspaceID := uint32(42) config.UpdateGlobal(func(conf *config.Config) { conf.Store = config.StoreTypeTiKV conf.KeyspaceName = "ks" }) - err := prepareKeyspaceObservability(keyspaceID, nil) + err := prepareKeyspaceObservabilityForStarter(nil) require.NoError(t, err) - require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks"}, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, map[string]string{"keyspace_name": "ks"}, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) config.UpdateGlobal(func(conf *config.Config) { conf.KeyspaceObservability = config.KeyspaceObservability{ Fields: []config.KeyspaceObservabilityField{{ Source: "meta_a", MetricLabel: "keyspace_meta_label_a", - SlowLogField: "Slow_meta_a", + SlowLogField: "keyspace_meta_slow_a", StmtLogField: "stmt_meta_a", Required: true, }}, } }) - err = prepareKeyspaceObservability(keyspaceID, map[string]string{ + err = prepareKeyspaceObservabilityForStarter(map[string]string{ "meta_a": "value_a", }) require.NoError(t, err) cfg := config.GetGlobalConfig() - require.Equal(t, map[string]string{"keyspace_id": "42", "keyspace_name": "ks", "keyspace_meta_label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) - require.Equal(t, map[string]string{"Slow_meta_a": "value_a"}, cfg.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, map[string]string{"keyspace_name": "ks", "keyspace_meta_label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, map[string]string{"keyspace_meta_slow_a": "value_a"}, cfg.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, cfg.GetKeyspaceObservabilityStmtLogFields()) } @@ -218,7 +217,6 @@ func TestSetupKeyspaceObservabilityForStarterSkipsNonTiKV(t *testing.T) { conf.KeyspaceName = "test_keyspace" }) - keyspaceID := uint32(42) - require.NoError(t, prepareKeyspaceObservability(keyspaceID, nil)) + require.NoError(t, prepareKeyspaceObservabilityForStarter(nil)) require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) } diff --git a/pkg/config/config.toml.nextgen.example b/pkg/config/config.toml.nextgen.example index a04d6a19c0c39..39a2ea85a39dd 100644 --- a/pkg/config/config.toml.nextgen.example +++ b/pkg/config/config.toml.nextgen.example @@ -450,8 +450,8 @@ engines = ["tikv", "tiflash", "tidb"] # Only valid when deploy-mode is starter. # [[keyspace-observability.fields]] # source = "meta_key" -# metric-label = "metric_label" -# slow-log-field = "Slow_log_field" +# metric-label = "keyspace_meta_metric_label" +# slow-log-field = "keyspace_meta_slow_log_field" # stmt-log-field = "stmt_log_field" # required = false diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index a512f6479ad4c..86d1561eae873 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -176,7 +176,7 @@ func TestKeyspaceObservability(t *testing.T) { [[keyspace-observability.fields]] source = "meta_a" metric-label = "keyspace_meta_label_a" -slow-log-field = "Slow_meta_a" +slow-log-field = "keyspace_meta_slow_a" stmt-log-field = "stmt_meta_a" required = true @@ -192,7 +192,7 @@ metric-label = "keyspace_meta_label_b" "meta_b": "value_b", })) require.Equal(t, map[string]string{"keyspace_meta_label_a": "value_a", "keyspace_meta_label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) - require.Equal(t, map[string]string{"Slow_meta_a": "value_a"}, conf.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, map[string]string{"keyspace_meta_slow_a": "value_a"}, conf.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, conf.GetKeyspaceObservabilityStmtLogFields()) require.ErrorContains(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_b": "value_b"}), `missing required keyspace metadata entry "meta_a"`) @@ -289,13 +289,13 @@ metric-label = "task_id" err: `metric-label "task_id" must start with "keyspace_meta_"`, }, { - name: "reserved slow log field", + name: "slow log field without prefix", content: ` -[[keyspace-observability.fields]] -source = "meta_a" -slow-log-field = "Digest" -`, - err: `reserved slow-log-field "Digest"`, + [[keyspace-observability.fields]] + source = "meta_a" + slow-log-field = "Digest" + `, + err: `slow-log-field "Digest" must start with "keyspace_meta_"`, }, { name: "invalid slow log field", @@ -309,15 +309,15 @@ slow-log-field = "Bad Field" { name: "duplicate slow log field", content: ` -[[keyspace-observability.fields]] -source = "meta_a" -slow-log-field = "Slow_meta" - -[[keyspace-observability.fields]] -source = "meta_b" -slow-log-field = "Slow_meta" -`, - err: `duplicated slow-log-field "Slow_meta"`, + [[keyspace-observability.fields]] + source = "meta_a" + slow-log-field = "keyspace_meta_slow" + + [[keyspace-observability.fields]] + source = "meta_b" + slow-log-field = "KEYSPACE_META_SLOW" + `, + err: `duplicated slow-log-field "KEYSPACE_META_SLOW"`, }, { name: "duplicate stmt log field", diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go index b93272eae0cb5..69531f00836ef 100644 --- a/pkg/config/keyspace_observability.go +++ b/pkg/config/keyspace_observability.go @@ -45,126 +45,6 @@ type KeyspaceObservabilityValues struct { const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" -var reservedKeyspaceObservabilitySlowLogFields = map[string]struct{}{ - "backoff_detail": {}, - "backoff_time": {}, - "backoff_total": {}, - "backoff_types": {}, - "binary_plan": {}, - "commit_backoff_time": {}, - "commit_primary_rpc_detail": {}, - "commit_time": {}, - "compile_time": {}, - "conn_id": {}, - "cop_backoff_": {}, - "cop_mvcc_read_amplification": {}, - "cop_proc_addr": {}, - "cop_proc_avg": {}, - "cop_proc_max": {}, - "cop_proc_p90": {}, - "cop_time": {}, - "cop_wait_addr": {}, - "cop_wait_avg": {}, - "cop_wait_max": {}, - "cop_wait_p90": {}, - "db": {}, - "digest": {}, - "disk_max": {}, - "exec_retry_count": {}, - "exec_retry_time": {}, - "get_commit_ts_time": {}, - "get_latest_ts_time": {}, - "get_snapshot_time": {}, - "has_more_results": {}, - "host": {}, - "index_names": {}, - "is_internal": {}, - "isexplicittxn": {}, - "issyncstatsfailed": {}, - "iswritecachetable": {}, - "keyspace_id": {}, - "keyspace_name": {}, - "kv_total": {}, - "local_latch_wait_time": {}, - "lockkeys_time": {}, - "mem_arbitration": {}, - "mem_max": {}, - "num_cop_tasks": {}, - "opt_binding_match": {}, - "opt_logical": {}, - "opt_physical": {}, - "opt_stats_derive": {}, - "opt_stats_sync_wait": {}, - "optimize_time": {}, - "parse_time": {}, - "pd_total": {}, - "plan": {}, - "plan_digest": {}, - "plan_from_binding": {}, - "plan_from_cache": {}, - "preproc_subqueries": {}, - "preproc_subqueries_time": {}, - "prepared": {}, - "prewrite_backoff_types": {}, - "prewrite_region": {}, - "prewrite_time": {}, - "prev_stmt": {}, - "process_keys": {}, - "process_time": {}, - "query": {}, - "query_time": {}, - "request_count": {}, - "request_unit_read": {}, - "request_unit_v2": {}, - "request_unit_v2_detail": {}, - "request_unit_write": {}, - "resolve_lock_time": {}, - "resource_group": {}, - "result_rows": {}, - "rewrite_time": {}, - "rocksdb_block_cache_hit_count": {}, - "rocksdb_block_read_byte": {}, - "rocksdb_block_read_count": {}, - "rocksdb_block_read_time": {}, - "rocksdb_delete_skipped_count": {}, - "rocksdb_key_skipped_count": {}, - "session_alias": {}, - "session_connect_attrs": {}, - "slowest_prewrite_rpc_detail": {}, - "stats": {}, - "storage_from_kv": {}, - "storage_from_mpp": {}, - "succ": {}, - "tidb_cpu_time": {}, - "tikv_cpu_time": {}, - "time": {}, - "time_queued_by_rc": {}, - "total_keys": {}, - "txn_retry": {}, - "txn_start_ts": {}, - "unpacked_bytes_received_tiflash_cross_zone": {}, - "unpacked_bytes_received_tiflash_total": {}, - "unpacked_bytes_received_tikv_cross_zone": {}, - "unpacked_bytes_received_tikv_total": {}, - "unpacked_bytes_sent_tiflash_cross_zone": {}, - "unpacked_bytes_sent_tiflash_total": {}, - "unpacked_bytes_sent_tikv_cross_zone": {}, - "unpacked_bytes_sent_tikv_total": {}, - "user": {}, - "user@host": {}, - "wait_prewrite_binlog_time": {}, - "wait_time": {}, - "wait_ts": {}, - "warnings": {}, - "write_keys": {}, - "write_size": {}, - "write_sql_response_total": {}, -} - -var reservedKeyspaceObservabilitySlowLogFieldPrefixes = []string{ - "cop_backoff_", -} - // Valid validates metadata observability mappings. func (o KeyspaceObservability) Valid() error { metricLabels := make(map[string]struct{}, len(o.Fields)) @@ -195,8 +75,8 @@ func (o KeyspaceObservability) Valid() error { return fmt.Errorf("[keyspace-observability.fields.%d] invalid slow-log-field %q", i, field.SlowLogField) } key := strings.ToLower(field.SlowLogField) - if isReservedKeyspaceObservabilitySlowLogField(key) { - return fmt.Errorf("[keyspace-observability.fields.%d] reserved slow-log-field %q", i, field.SlowLogField) + if !strings.HasPrefix(key, keyspaceObservabilityMetricLabelPrefix) { + return fmt.Errorf("[keyspace-observability.fields.%d] slow-log-field %q must start with %q", i, field.SlowLogField, keyspaceObservabilityMetricLabelPrefix) } if _, ok := slowLogFields[key]; ok { return fmt.Errorf("[keyspace-observability.fields.%d] duplicated slow-log-field %q", i, field.SlowLogField) @@ -214,18 +94,6 @@ func (o KeyspaceObservability) Valid() error { return nil } -func isReservedKeyspaceObservabilitySlowLogField(field string) bool { - if _, ok := reservedKeyspaceObservabilitySlowLogFields[field]; ok { - return true - } - for _, prefix := range reservedKeyspaceObservabilitySlowLogFieldPrefixes { - if strings.HasPrefix(field, prefix) { - return true - } - } - return false -} - func validKeyspaceObservabilityLogFieldName(field string) bool { return validPrometheusLabelName(field) } diff --git a/pkg/sessionctx/variable/tests/session_test.go b/pkg/sessionctx/variable/tests/session_test.go index a77c4ed466e15..9266dcde900b0 100644 --- a/pkg/sessionctx/variable/tests/session_test.go +++ b/pkg/sessionctx/variable/tests/session_test.go @@ -391,13 +391,13 @@ func TestSlowLogFormat(t *testing.T) { conf.KeyspaceObservability = config.KeyspaceObservability{ Fields: []config.KeyspaceObservabilityField{{ Source: "meta_a", - SlowLogField: "Slow_meta_a", + SlowLogField: "keyspace_meta_slow_a", }}, } require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_a": "value_a"})) }) logString = seVar.SlowLogFormat(logItems) - require.Equal(t, resultFields+"\n"+"# Slow_meta_a: value_a\n"+sql, logString) + require.Equal(t, resultFields+"\n"+"# keyspace_meta_slow_a: value_a\n"+sql, logString) // test PrepareSlowLogItemsForRules and CompleteSlowLogItemsForRules seVar.SlowLogRules = slowlogrule.NewSessionSlowLogRules(&slowlogrule.SlowLogRules{ diff --git a/pkg/standby/standby.go b/pkg/standby/standby.go index b91c054625bf9..91f0ce4c7753a 100644 --- a/pkg/standby/standby.go +++ b/pkg/standby/standby.go @@ -48,9 +48,8 @@ const ( // ActivateRequest is the request body for activating the tidb server. type ActivateRequest struct { - KeyspaceName string `json:"keyspace_name"` - KeyspaceID *uint32 `json:"keyspace_id,omitempty"` - MaxIdleSeconds uint `json:"max_idle_seconds"` + KeyspaceName string `json:"keyspace_name"` + MaxIdleSeconds uint `json:"max_idle_seconds"` // Metadata is keyspace metadata sent by the manager during activation, such as tenant, project, and cluster identifiers. Metadata map[string]string `json:"metadata,omitempty"` @@ -183,13 +182,6 @@ func (c *LoadKeyspaceController) ActivationMetadata() map[string]string { return metadata } -// ActivationKeyspaceID returns the keyspace ID carried by the activate request. -func (c *LoadKeyspaceController) ActivationKeyspaceID() uint32 { - mu.RLock() - defer mu.RUnlock() - return *activateRequest.KeyspaceID -} - // Handler returns a handler to query tidb pool status or activate or exit the tidb server. func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.ServeMux) { mux := http.NewServeMux() @@ -200,7 +192,7 @@ func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.Serv w.WriteHeader(http.StatusBadRequest) return } - if req.KeyspaceName == "" || req.KeyspaceID == nil { + if req.KeyspaceName == "" { w.WriteHeader(http.StatusBadRequest) return } @@ -219,7 +211,7 @@ func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.Serv logutil.BgLogger().Error("failed to write response", zap.Error(err)) } return - case activateRequest.KeyspaceName != req.KeyspaceName || *activateRequest.KeyspaceID != *req.KeyspaceID: + case activateRequest.KeyspaceName != req.KeyspaceName: mu.Unlock() w.WriteHeader(http.StatusPreconditionFailed) _, err := w.Write([]byte("server is not in standby mode")) @@ -375,7 +367,6 @@ func (c *LoadKeyspaceController) WaitForActivate() { logutil.BgLogger().Info("standby receive activate request", zap.String("keyspace-name", activateRequest.KeyspaceName), - zap.Uint32p("keyspace-id", activateRequest.KeyspaceID), zap.Uint("max-idle-seconds", activateRequest.MaxIdleSeconds), zap.Bool("run-auto-analyze", activateRequest.RunAutoAnalyze), zap.Bool("tidb-enable-ddl", activateRequest.TiDBEnableDDL), diff --git a/pkg/standby/standby_test.go b/pkg/standby/standby_test.go index 33b6d002f4544..d5413d96e3d3f 100644 --- a/pkg/standby/standby_test.go +++ b/pkg/standby/standby_test.go @@ -16,7 +16,6 @@ package standby import ( "encoding/json" - "errors" "net/http" "net/http/httptest" "strings" @@ -29,22 +28,14 @@ func TestActivateRequestMetadata(t *testing.T) { var req ActivateRequest require.NoError(t, json.Unmarshal([]byte(`{ "keyspace_name": "ks", - "keyspace_id": 42, "metadata": { "meta_a": "value_a" } }`), &req)) - require.NotNil(t, req.KeyspaceID) - require.Equal(t, uint32(42), *req.KeyspaceID) require.Equal(t, map[string]string{ "meta_a": "value_a", }, req.Metadata) - var zeroKeyspaceIDReq ActivateRequest - require.NoError(t, json.Unmarshal([]byte(`{"keyspace_name":"ks","keyspace_id":0}`), &zeroKeyspaceIDReq)) - require.NotNil(t, zeroKeyspaceIDReq.KeyspaceID) - require.Equal(t, uint32(0), *zeroKeyspaceIDReq.KeyspaceID) - mu.Lock() originalRequest := activateRequest activateRequest = req @@ -60,55 +51,15 @@ func TestActivateRequestMetadata(t *testing.T) { require.Equal(t, req.Metadata, metadata) metadata["meta_a"] = "changed" require.Equal(t, "value_a", controller.ActivationMetadata()["meta_a"]) - - keyspaceID := controller.ActivationKeyspaceID() - require.Equal(t, uint32(42), keyspaceID) - require.Equal(t, uint32(42), controller.ActivationKeyspaceID()) } -func TestActivateRequiresKeyspaceID(t *testing.T) { +func TestActivateRequiresKeyspaceName(t *testing.T) { controller := NewLoadKeyspaceController() _, mux := controller.Handler(nil) - req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{"keyspace_name":"ks"}`)) + req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{}`)) resp := httptest.NewRecorder() mux.ServeHTTP(resp, req) require.Equal(t, http.StatusBadRequest, resp.Code) } - -func TestActivateRejectsMismatchedKeyspaceID(t *testing.T) { - mu.Lock() - originalState, originalRequest := state, activateRequest - state = standbyState - activateRequest = ActivateRequest{} - mu.Unlock() - t.Cleanup(func() { - mu.Lock() - state = originalState - activateRequest = originalRequest - mu.Unlock() - }) - - controller := NewLoadKeyspaceController() - _, mux := controller.Handler(nil) - firstRespCode := make(chan int, 1) - go func() { - req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{"keyspace_name":"ks","keyspace_id":42}`)) - resp := httptest.NewRecorder() - mux.ServeHTTP(resp, req) - firstRespCode <- resp.Code - }() - t.Cleanup(func() { - controller.EndStandby(errors.New("test done")) - require.Equal(t, http.StatusInternalServerError, <-firstRespCode) - }) - - <-activateCh - req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{"keyspace_name":"ks","keyspace_id":43}`)) - resp := httptest.NewRecorder() - - mux.ServeHTTP(resp, req) - - require.Equal(t, http.StatusPreconditionFailed, resp.Code) -} From 9a233c0cc169095077929968738f6ef323cfd39d Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 26 May 2026 21:34:24 +0800 Subject: [PATCH 18/18] config: precompute slow log observability fields --- cmd/tidb-server/main_test.go | 4 +++- pkg/config/config_test.go | 6 +++++- pkg/config/keyspace_observability.go | 28 ++++++++++++++++++++-------- pkg/sessionctx/variable/slow_log.go | 8 ++------ 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index c761026e62eb7..4a088541c2ea4 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -195,7 +195,9 @@ func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { cfg := config.GetGlobalConfig() require.Equal(t, map[string]string{"keyspace_name": "ks", "keyspace_meta_label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) - require.Equal(t, map[string]string{"keyspace_meta_slow_a": "value_a"}, cfg.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, []config.KeyspaceObservabilityLogField{ + {Name: "keyspace_meta_slow_a", Value: "value_a"}, + }, cfg.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, cfg.GetKeyspaceObservabilityStmtLogFields()) } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 86d1561eae873..319348b4a6abd 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -183,6 +183,7 @@ required = true [[keyspace-observability.fields]] source = "meta_b" metric-label = "keyspace_meta_label_b" +slow-log-field = "keyspace_meta_slow_b" ` _, err := toml.Decode(content, conf) require.NoError(t, err) @@ -192,7 +193,10 @@ metric-label = "keyspace_meta_label_b" "meta_b": "value_b", })) require.Equal(t, map[string]string{"keyspace_meta_label_a": "value_a", "keyspace_meta_label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) - require.Equal(t, map[string]string{"keyspace_meta_slow_a": "value_a"}, conf.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, []KeyspaceObservabilityLogField{ + {Name: "keyspace_meta_slow_a", Value: "value_a"}, + {Name: "keyspace_meta_slow_b", Value: "value_b"}, + }, conf.GetKeyspaceObservabilitySlowLogFields()) require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, conf.GetKeyspaceObservabilityStmtLogFields()) require.ErrorContains(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_b": "value_b"}), `missing required keyspace metadata entry "meta_a"`) diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go index 69531f00836ef..c53160654e13e 100644 --- a/pkg/config/keyspace_observability.go +++ b/pkg/config/keyspace_observability.go @@ -17,6 +17,7 @@ package config import ( "fmt" "maps" + "sort" "strings" "github.com/prometheus/common/model" @@ -38,9 +39,15 @@ type KeyspaceObservabilityField struct { // KeyspaceObservabilityValues stores resolved metadata values. type KeyspaceObservabilityValues struct { - MetricLabels map[string]string `toml:"-" json:"-"` - SlowLogFields map[string]string `toml:"-" json:"-"` - StmtLogFields map[string]string `toml:"-" json:"-"` + MetricLabels map[string]string `toml:"-" json:"-"` + SlowLogFields []KeyspaceObservabilityLogField `toml:"-" json:"-"` + StmtLogFields map[string]string `toml:"-" json:"-"` +} + +// KeyspaceObservabilityLogField stores a resolved log field value. +type KeyspaceObservabilityLogField struct { + Name string + Value string } const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" @@ -106,7 +113,6 @@ func validPrometheusLabelName(label string) bool { func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { resolved := KeyspaceObservabilityValues{ MetricLabels: make(map[string]string), - SlowLogFields: make(map[string]string), StmtLogFields: make(map[string]string), } for _, field := range c.KeyspaceObservability.Fields { @@ -121,12 +127,18 @@ func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { resolved.MetricLabels[field.MetricLabel] = value } if field.SlowLogField != "" { - resolved.SlowLogFields[field.SlowLogField] = value + resolved.SlowLogFields = append(resolved.SlowLogFields, KeyspaceObservabilityLogField{ + Name: field.SlowLogField, + Value: value, + }) } if field.StmtLogField != "" { resolved.StmtLogFields[field.StmtLogField] = value } } + sort.SliceStable(resolved.SlowLogFields, func(i, j int) bool { + return resolved.SlowLogFields[i].Name < resolved.SlowLogFields[j].Name + }) c.KeyspaceObservabilityValues = resolved return nil } @@ -138,7 +150,7 @@ func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { res.MetricLabels = maps.Clone(v.MetricLabels) } if len(v.SlowLogFields) > 0 { - res.SlowLogFields = maps.Clone(v.SlowLogFields) + res.SlowLogFields = append([]KeyspaceObservabilityLogField(nil), v.SlowLogFields...) } if len(v.StmtLogFields) > 0 { res.StmtLogFields = maps.Clone(v.StmtLogFields) @@ -151,8 +163,8 @@ func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { return c.KeyspaceObservabilityValues.MetricLabels } -// GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields. -func (c *Config) GetKeyspaceObservabilitySlowLogFields() map[string]string { +// GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields in stable order. +func (c *Config) GetKeyspaceObservabilitySlowLogFields() []KeyspaceObservabilityLogField { return c.KeyspaceObservabilityValues.SlowLogFields } diff --git a/pkg/sessionctx/variable/slow_log.go b/pkg/sessionctx/variable/slow_log.go index 873a11b27506f..e60bf9fb5bf14 100644 --- a/pkg/sessionctx/variable/slow_log.go +++ b/pkg/sessionctx/variable/slow_log.go @@ -20,7 +20,6 @@ import ( "encoding/json" "fmt" "hash/crc64" - "maps" "math" "regexp" "slices" @@ -591,11 +590,8 @@ func (s *SessionVars) SlowLogFormat(logItems *SlowQueryLogItems) string { if logItems.PrevStmt != "" { writeSlowLogItem(&buf, SlowLogPrevStmt, logItems.PrevStmt) } - keyspaceFields := config.GetGlobalConfig().GetKeyspaceObservabilitySlowLogFields() - keyspaceFieldKeys := slices.Collect(maps.Keys(keyspaceFields)) - slices.Sort(keyspaceFieldKeys) - for _, key := range keyspaceFieldKeys { - writeSlowLogItem(&buf, key, keyspaceFields[key]) + for _, field := range config.GetGlobalConfig().GetKeyspaceObservabilitySlowLogFields() { + writeSlowLogItem(&buf, field.Name, field.Value) } if s.CurrentDBChanged {