diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index 4ff8cdac2f33f..b4e78ac7e11c5 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -107,7 +107,7 @@ go_test( srcs = ["main_test.go"], embed = [":tidb-server_lib"], flaky = True, - shard_count = 6, + shard_count = 8, deps = [ "//pkg/config", "//pkg/config/deploymode", diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index 19593cc379599..43a886d31e67e 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -19,6 +19,7 @@ import ( "flag" "fmt" "io/fs" + "maps" "os" "runtime" "strconv" @@ -314,6 +315,7 @@ func main() { } var standbyController server.StandbyController + var activationMetadata map[string]string if config.GetGlobalConfig().Standby.StandByMode { standbyController = standby.NewLoadKeyspaceController() } @@ -330,11 +332,18 @@ func main() { defer standbyController.EndStandby(err) // need to validate config again in case of config change via standby terror.MustNil(config.GetGlobalConfig().Valid()) + if c, ok := standbyController.(*standby.LoadKeyspaceController); ok { + activationMetadata = c.ActivationMetadata() + } } signal.SetupUSR1Handler() err = registerStores() terror.MustNil(err) + if deploymode.IsStarter() { + err = prepareKeyspaceObservabilityForStarter(activationMetadata) + terror.MustNil(err) + } err = metricsutil.RegisterMetrics() terror.MustNil(err) @@ -1146,6 +1155,39 @@ func closeStmtSummary() { } } +const ( + keyspaceNameMetricLabel = "keyspace_name" +) + +func prepareKeyspaceObservabilityForStarter(metadata map[string]string) error { + cfg := config.GetGlobalConfig() + + if cfg.Store != config.StoreTypeTiKV { + return nil + } + + resolvedValues := config.KeyspaceObservabilityValues{ + MetricLabels: map[string]string{ + keyspaceNameMetricLabel: cfg.KeyspaceName, + }, + } + + copiedConfig := *config.GetGlobalConfig() + if err := copiedConfig.ResolveKeyspaceObservability(metadata); err != nil { + return err + } + configuredValues := copiedConfig.KeyspaceObservabilityValues.Clone() + maps.Copy(resolvedValues.MetricLabels, configuredValues.MetricLabels) + resolvedValues.SlowLogFields = configuredValues.SlowLogFields + resolvedValues.StmtLogFields = configuredValues.StmtLogFields + + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservabilityValues = resolvedValues + }) + + return nil +} + func enablePyroscope() { if os.Getenv("PYROSCOPE_SERVER_ADDRESS") != "" { runtime.SetMutexProfileFraction(5) diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index 06aeafea45487..4a088541c2ea4 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -154,3 +154,71 @@ func TestSetVersionByConfigNormalizeLegacyPlaceholderForNextGen(t *testing.T) { require.Equal(t, "v26.3.0", mysql.TiDBReleaseVersion) require.Equal(t, "8.0.11-TiDB-CLOUD.202603.0", mysql.ServerVersion) } + +func TestSetupKeyspaceObservabilityForStarter(t *testing.T) { + if kerneltype.IsClassic() { + t.Skip("only for nextgen kernel") + } + restore := config.RestoreFunc() + defer restore() + + originalMode := deploymode.Get() + t.Cleanup(func() { + require.NoError(t, deploymode.Set(originalMode)) + }) + require.NoError(t, deploymode.Set(deploymode.Starter)) + + config.UpdateGlobal(func(conf *config.Config) { + conf.Store = config.StoreTypeTiKV + conf.KeyspaceName = "ks" + }) + err := prepareKeyspaceObservabilityForStarter(nil) + require.NoError(t, err) + require.Equal(t, map[string]string{"keyspace_name": "ks"}, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) + + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservability = config.KeyspaceObservability{ + Fields: []config.KeyspaceObservabilityField{{ + Source: "meta_a", + MetricLabel: "keyspace_meta_label_a", + SlowLogField: "keyspace_meta_slow_a", + StmtLogField: "stmt_meta_a", + Required: true, + }}, + } + }) + + err = prepareKeyspaceObservabilityForStarter(map[string]string{ + "meta_a": "value_a", + }) + require.NoError(t, err) + + cfg := config.GetGlobalConfig() + require.Equal(t, map[string]string{"keyspace_name": "ks", "keyspace_meta_label_a": "value_a"}, cfg.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, []config.KeyspaceObservabilityLogField{ + {Name: "keyspace_meta_slow_a", Value: "value_a"}, + }, cfg.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, cfg.GetKeyspaceObservabilityStmtLogFields()) +} + +func TestSetupKeyspaceObservabilityForStarterSkipsNonTiKV(t *testing.T) { + if kerneltype.IsClassic() { + t.Skip("only for nextgen kernel") + } + restore := config.RestoreFunc() + defer restore() + originalMode := deploymode.Get() + t.Cleanup(func() { + require.NoError(t, deploymode.Set(originalMode)) + }) + require.NoError(t, deploymode.Set(deploymode.Starter)) + + config.UpdateGlobal(func(conf *config.Config) { + conf.Store = config.StoreTypeUniStore + conf.Path = "invalid-pd-path" + conf.KeyspaceName = "test_keyspace" + }) + + require.NoError(t, prepareKeyspaceObservabilityForStarter(nil)) + require.Empty(t, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) +} diff --git a/pkg/config/BUILD.bazel b/pkg/config/BUILD.bazel index 57b2c3b89a90c..3787f75caebfb 100644 --- a/pkg/config/BUILD.bazel +++ b/pkg/config/BUILD.bazel @@ -6,6 +6,7 @@ go_library( "config.go", "config_util.go", "const.go", + "keyspace_observability.go", "store.go", "tiflash.go", ], @@ -23,6 +24,7 @@ go_library( "@com_github_burntsushi_toml//:toml", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_log//:log", + "@com_github_prometheus_common//model", "@com_github_tikv_client_go_v2//config", "@com_github_uber_jaeger_client_go//config", "@org_uber_go_atomic//:atomic", @@ -42,7 +44,7 @@ go_test( data = glob(["**"]), embed = [":config"], flaky = True, - shard_count = 32, + shard_count = 34, deps = [ "//pkg/config/deploymode", "//pkg/config/kerneltype", diff --git a/pkg/config/config.go b/pkg/config/config.go index a069b87e82ef2..f999841efc166 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -262,6 +262,9 @@ type Config struct { // key will be the default value of the session variable `txn_scope` for this tidb-server. Labels map[string]string `toml:"labels" json:"labels"` + KeyspaceObservability KeyspaceObservability `toml:"keyspace-observability" json:"keyspace-observability"` + KeyspaceObservabilityValues KeyspaceObservabilityValues `toml:"-" json:"-"` + // EnableGlobalIndex is deprecated. EnableGlobalIndex bool `toml:"enable-global-index" json:"enable-global-index"` @@ -1473,12 +1476,18 @@ func (c *Config) Valid() error { if !kerneltype.IsNextGen() && c.DeployMode != deploymode.Premium { return fmt.Errorf("deploy-mode can only be configured for nextgen TiDB") } + if len(c.KeyspaceObservability.Fields) > 0 && c.DeployMode != deploymode.Starter { + return fmt.Errorf("keyspace-observability.fields can only be configured when deploy-mode is starter") + } if c.DXFResourceLimit < MinDXFResourceLimit || c.DXFResourceLimit > MaxDXFResourceLimit { return fmt.Errorf("dxf-resource-limit should be between %d and %d", MinDXFResourceLimit, MaxDXFResourceLimit) } if c.DXFResourceLimit != DefDXFResourceLimit && c.DeployMode != deploymode.PremiumReserved { return fmt.Errorf("dxf-resource-limit can only be configured when deploy-mode is premium_reserved") } + if err := c.KeyspaceObservability.Valid(); err != nil { + return err + } if c.Store == StoreTypeMockTiKV && !c.Instance.TiDBEnableDDL.Load() { return fmt.Errorf("can't disable DDL on mocktikv") } diff --git a/pkg/config/config.toml.nextgen.example b/pkg/config/config.toml.nextgen.example index f8ad031a58d92..39a2ea85a39dd 100644 --- a/pkg/config/config.toml.nextgen.example +++ b/pkg/config/config.toml.nextgen.example @@ -446,6 +446,15 @@ allow-expression-index = false # engines means allow the tidb server read data from which types of engines. options: "tikv", "tiflash", "tidb". engines = ["tikv", "tiflash", "tidb"] +# Map selected keyspace metadata entries to observability outputs. +# Only valid when deploy-mode is starter. +# [[keyspace-observability.fields]] +# source = "meta_key" +# metric-label = "keyspace_meta_metric_label" +# slow-log-field = "keyspace_meta_slow_log_field" +# stmt-log-field = "stmt_log_field" +# required = false + # instance scope variables # These options are also available as a system variable for online configuration # changes to the system variable do not persist to the cluster. You must make changes diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 5628ff4a6242d..319348b4a6abd 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -170,6 +170,192 @@ disable-error-stack = false `, nbFalse, nbUnset, nbUnset, nbUnset, false, true) } +func TestKeyspaceObservability(t *testing.T) { + conf := NewConfig() + content := ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "keyspace_meta_label_a" +slow-log-field = "keyspace_meta_slow_a" +stmt-log-field = "stmt_meta_a" +required = true + +[[keyspace-observability.fields]] +source = "meta_b" +metric-label = "keyspace_meta_label_b" +slow-log-field = "keyspace_meta_slow_b" +` + _, err := toml.Decode(content, conf) + require.NoError(t, err) + require.NoError(t, conf.KeyspaceObservability.Valid()) + require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{ + "meta_a": "value_a", + "meta_b": "value_b", + })) + require.Equal(t, map[string]string{"keyspace_meta_label_a": "value_a", "keyspace_meta_label_b": "value_b"}, conf.GetKeyspaceObservabilityMetricLabels()) + require.Equal(t, []KeyspaceObservabilityLogField{ + {Name: "keyspace_meta_slow_a", Value: "value_a"}, + {Name: "keyspace_meta_slow_b", Value: "value_b"}, + }, conf.GetKeyspaceObservabilitySlowLogFields()) + require.Equal(t, map[string]string{"stmt_meta_a": "value_a"}, conf.GetKeyspaceObservabilityStmtLogFields()) + + require.ErrorContains(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_b": "value_b"}), `missing required keyspace metadata entry "meta_a"`) +} + +func TestKeyspaceObservabilityInvalid(t *testing.T) { + tests := []struct { + name string + content string + err string + }{ + { + name: "empty source", + content: ` +[[keyspace-observability.fields]] +source = "" +metric-label = "keyspace_meta_label_a" +`, + err: "source cannot be empty", + }, + { + name: "empty output", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +`, + err: "at least one output must be set", + }, + { + name: "invalid label", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "1_label" +`, + err: `invalid metric-label "1_label"`, + }, + { + name: "duplicate label", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "keyspace_meta_label_a" + +[[keyspace-observability.fields]] +source = "meta_b" +metric-label = "KEYSPACE_META_LABEL_A" +`, + err: `duplicated metric-label "KEYSPACE_META_LABEL_A"`, + }, + { + name: "reserved label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "KEYSPACE_ID" +`, + err: `metric-label "KEYSPACE_ID" must start with "keyspace_meta_"`, + }, + { + name: "metric variable label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "TYPE" +`, + err: `metric-label "TYPE" must start with "keyspace_meta_"`, + }, + { + name: "api label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "api" +`, + err: `metric-label "api" must start with "keyspace_meta_"`, + }, + { + name: "service scope label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "service_scope" +`, + err: `metric-label "service_scope" must start with "keyspace_meta_"`, + }, + { + name: "task id label without prefix", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "task_id" +`, + err: `metric-label "task_id" must start with "keyspace_meta_"`, + }, + { + name: "slow log field without prefix", + content: ` + [[keyspace-observability.fields]] + source = "meta_a" + slow-log-field = "Digest" + `, + err: `slow-log-field "Digest" must start with "keyspace_meta_"`, + }, + { + name: "invalid slow log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +slow-log-field = "Bad Field" +`, + err: `invalid slow-log-field "Bad Field"`, + }, + { + name: "duplicate slow log field", + content: ` + [[keyspace-observability.fields]] + source = "meta_a" + slow-log-field = "keyspace_meta_slow" + + [[keyspace-observability.fields]] + source = "meta_b" + slow-log-field = "KEYSPACE_META_SLOW" + `, + err: `duplicated slow-log-field "KEYSPACE_META_SLOW"`, + }, + { + name: "duplicate stmt log field", + content: ` +[[keyspace-observability.fields]] +source = "meta_a" +stmt-log-field = "stmt_meta" + +[[keyspace-observability.fields]] +source = "meta_b" +stmt-log-field = "stmt_meta" +`, + err: `duplicated stmt-log-field "stmt_meta"`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := NewConfig() + _, err := toml.Decode(tt.content, conf) + require.NoError(t, err) + require.ErrorContains(t, conf.KeyspaceObservability.Valid(), tt.err) + }) + } + + conf := NewConfig() + _, err := toml.Decode(` +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "keyspace_meta_label_a" +`, conf) + require.NoError(t, err) + require.ErrorContains(t, conf.Valid(), "keyspace-observability.fields can only be configured when deploy-mode is starter") +} + func TestRemovedVariableCheck(t *testing.T) { configTest := []struct { options string @@ -1112,6 +1298,17 @@ dxf-resource-limit = 101`), 0644)) require.Equal(t, deploymode.Starter, conf.DeployMode) require.NoError(t, conf.Valid()) + require.NoError(t, os.WriteFile(configFile, []byte(`deploy-mode = "starter" + +[[keyspace-observability.fields]] +source = "meta_a" +metric-label = "keyspace_meta_label_a" +`), 0644)) + conf = NewConfig() + require.NoError(t, conf.Load(configFile)) + require.Equal(t, deploymode.Starter, conf.DeployMode) + require.NoError(t, conf.Valid()) + require.NoError(t, os.WriteFile(configFile, []byte(`deploy-mode = "unknown"`), 0644)) conf = NewConfig() require.ErrorContains(t, conf.Load(configFile), `invalid deploy mode "unknown"`) diff --git a/pkg/config/keyspace_observability.go b/pkg/config/keyspace_observability.go new file mode 100644 index 0000000000000..c53160654e13e --- /dev/null +++ b/pkg/config/keyspace_observability.go @@ -0,0 +1,174 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + "maps" + "sort" + "strings" + + "github.com/prometheus/common/model" +) + +// KeyspaceObservability maps metadata entries to observability outputs. +type KeyspaceObservability struct { + Fields []KeyspaceObservabilityField `toml:"fields" json:"fields,omitempty"` +} + +// KeyspaceObservabilityField describes one metadata entry mapping. +type KeyspaceObservabilityField struct { + Source string `toml:"source" json:"source,omitempty"` + MetricLabel string `toml:"metric-label" json:"metric-label,omitempty"` + SlowLogField string `toml:"slow-log-field" json:"slow-log-field,omitempty"` + StmtLogField string `toml:"stmt-log-field" json:"stmt-log-field,omitempty"` + Required bool `toml:"required" json:"required,omitempty"` +} + +// KeyspaceObservabilityValues stores resolved metadata values. +type KeyspaceObservabilityValues struct { + MetricLabels map[string]string `toml:"-" json:"-"` + SlowLogFields []KeyspaceObservabilityLogField `toml:"-" json:"-"` + StmtLogFields map[string]string `toml:"-" json:"-"` +} + +// KeyspaceObservabilityLogField stores a resolved log field value. +type KeyspaceObservabilityLogField struct { + Name string + Value string +} + +const keyspaceObservabilityMetricLabelPrefix = "keyspace_meta_" + +// Valid validates metadata observability mappings. +func (o KeyspaceObservability) Valid() error { + metricLabels := make(map[string]struct{}, len(o.Fields)) + slowLogFields := make(map[string]struct{}, len(o.Fields)) + stmtLogFields := make(map[string]struct{}, len(o.Fields)) + for i, field := range o.Fields { + if field.Source == "" { + return fmt.Errorf("[keyspace-observability.fields.%d] source cannot be empty", i) + } + if field.MetricLabel == "" && field.SlowLogField == "" && field.StmtLogField == "" { + return fmt.Errorf("[keyspace-observability.fields.%d] at least one output must be set", i) + } + if field.MetricLabel != "" { + if !validPrometheusLabelName(field.MetricLabel) { + return fmt.Errorf("[keyspace-observability.fields.%d] invalid metric-label %q", i, field.MetricLabel) + } + key := strings.ToLower(field.MetricLabel) + if !strings.HasPrefix(key, keyspaceObservabilityMetricLabelPrefix) { + return fmt.Errorf("[keyspace-observability.fields.%d] metric-label %q must start with %q", i, field.MetricLabel, keyspaceObservabilityMetricLabelPrefix) + } + if _, ok := metricLabels[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated metric-label %q", i, field.MetricLabel) + } + metricLabels[key] = struct{}{} + } + if field.SlowLogField != "" { + if !validKeyspaceObservabilityLogFieldName(field.SlowLogField) { + return fmt.Errorf("[keyspace-observability.fields.%d] invalid slow-log-field %q", i, field.SlowLogField) + } + key := strings.ToLower(field.SlowLogField) + if !strings.HasPrefix(key, keyspaceObservabilityMetricLabelPrefix) { + return fmt.Errorf("[keyspace-observability.fields.%d] slow-log-field %q must start with %q", i, field.SlowLogField, keyspaceObservabilityMetricLabelPrefix) + } + if _, ok := slowLogFields[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated slow-log-field %q", i, field.SlowLogField) + } + slowLogFields[key] = struct{}{} + } + if field.StmtLogField != "" { + key := strings.ToLower(field.StmtLogField) + if _, ok := stmtLogFields[key]; ok { + return fmt.Errorf("[keyspace-observability.fields.%d] duplicated stmt-log-field %q", i, field.StmtLogField) + } + stmtLogFields[key] = struct{}{} + } + } + return nil +} + +func validKeyspaceObservabilityLogFieldName(field string) bool { + return validPrometheusLabelName(field) +} + +func validPrometheusLabelName(label string) bool { + return model.LabelName(label).IsValid() && model.LabelName(label).IsValidLegacy() +} + +// ResolveKeyspaceObservability resolves configured output values from metadata. +func (c *Config) ResolveKeyspaceObservability(values map[string]string) error { + resolved := KeyspaceObservabilityValues{ + MetricLabels: make(map[string]string), + StmtLogFields: make(map[string]string), + } + for _, field := range c.KeyspaceObservability.Fields { + value, ok := values[field.Source] + if !ok { + if field.Required { + return fmt.Errorf("missing required keyspace metadata entry %q", field.Source) + } + continue + } + if field.MetricLabel != "" { + resolved.MetricLabels[field.MetricLabel] = value + } + if field.SlowLogField != "" { + resolved.SlowLogFields = append(resolved.SlowLogFields, KeyspaceObservabilityLogField{ + Name: field.SlowLogField, + Value: value, + }) + } + if field.StmtLogField != "" { + resolved.StmtLogFields[field.StmtLogField] = value + } + } + sort.SliceStable(resolved.SlowLogFields, func(i, j int) bool { + return resolved.SlowLogFields[i].Name < resolved.SlowLogFields[j].Name + }) + c.KeyspaceObservabilityValues = resolved + return nil +} + +// Clone returns a deep copy of resolved metadata observability values. +func (v KeyspaceObservabilityValues) Clone() KeyspaceObservabilityValues { + res := KeyspaceObservabilityValues{} + if len(v.MetricLabels) > 0 { + res.MetricLabels = maps.Clone(v.MetricLabels) + } + if len(v.SlowLogFields) > 0 { + res.SlowLogFields = append([]KeyspaceObservabilityLogField(nil), v.SlowLogFields...) + } + if len(v.StmtLogFields) > 0 { + res.StmtLogFields = maps.Clone(v.StmtLogFields) + } + return res +} + +// GetKeyspaceObservabilityMetricLabels returns resolved metric labels. +func (c *Config) GetKeyspaceObservabilityMetricLabels() map[string]string { + return c.KeyspaceObservabilityValues.MetricLabels +} + +// GetKeyspaceObservabilitySlowLogFields returns resolved slow log fields in stable order. +func (c *Config) GetKeyspaceObservabilitySlowLogFields() []KeyspaceObservabilityLogField { + return c.KeyspaceObservabilityValues.SlowLogFields +} + +// GetKeyspaceObservabilityStmtLogFields returns resolved statement log fields. +func (c *Config) GetKeyspaceObservabilityStmtLogFields() map[string]string { + return c.KeyspaceObservabilityValues.StmtLogFields +} diff --git a/pkg/sessionctx/variable/slow_log.go b/pkg/sessionctx/variable/slow_log.go index 7db2b4b2340b9..e60bf9fb5bf14 100644 --- a/pkg/sessionctx/variable/slow_log.go +++ b/pkg/sessionctx/variable/slow_log.go @@ -29,6 +29,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/terror" "github.com/pingcap/tidb/pkg/sessionctx/slowlogrule" @@ -589,6 +590,9 @@ func (s *SessionVars) SlowLogFormat(logItems *SlowQueryLogItems) string { if logItems.PrevStmt != "" { writeSlowLogItem(&buf, SlowLogPrevStmt, logItems.PrevStmt) } + for _, field := range config.GetGlobalConfig().GetKeyspaceObservabilitySlowLogFields() { + writeSlowLogItem(&buf, field.Name, field.Value) + } if s.CurrentDBChanged { fmt.Fprintf(&buf, "use %s;\n", strings.ToLower(s.CurrentDB)) diff --git a/pkg/sessionctx/variable/tests/session_test.go b/pkg/sessionctx/variable/tests/session_test.go index ffa0562724f8e..9266dcde900b0 100644 --- a/pkg/sessionctx/variable/tests/session_test.go +++ b/pkg/sessionctx/variable/tests/session_test.go @@ -385,6 +385,20 @@ func TestSlowLogFormat(t *testing.T) { // Restore for subsequent assertions. logItems.SessionConnectAttrs = nil + restore := config.RestoreFunc() + defer restore() + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservability = config.KeyspaceObservability{ + Fields: []config.KeyspaceObservabilityField{{ + Source: "meta_a", + SlowLogField: "keyspace_meta_slow_a", + }}, + } + require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_a": "value_a"})) + }) + logString = seVar.SlowLogFormat(logItems) + require.Equal(t, resultFields+"\n"+"# keyspace_meta_slow_a: value_a\n"+sql, logString) + // test PrepareSlowLogItemsForRules and CompleteSlowLogItemsForRules seVar.SlowLogRules = slowlogrule.NewSessionSlowLogRules(&slowlogrule.SlowLogRules{ Fields: map[string]struct{}{ diff --git a/pkg/standby/BUILD.bazel b/pkg/standby/BUILD.bazel index dc917bccb6cb6..897be22874a26 100644 --- a/pkg/standby/BUILD.bazel +++ b/pkg/standby/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "standby", @@ -18,3 +18,13 @@ go_library( "@org_uber_go_zap//:zap", ], ) + +go_test( + name = "standby_test", + timeout = "short", + srcs = ["standby_test.go"], + embed = [":standby"], + flaky = True, + shard_count = 3, + deps = ["@com_github_stretchr_testify//require"], +) diff --git a/pkg/standby/standby.go b/pkg/standby/standby.go index 267c80fa0b947..91f0ce4c7753a 100644 --- a/pkg/standby/standby.go +++ b/pkg/standby/standby.go @@ -50,6 +50,8 @@ const ( type ActivateRequest struct { KeyspaceName string `json:"keyspace_name"` MaxIdleSeconds uint `json:"max_idle_seconds"` + // Metadata is keyspace metadata sent by the manager during activation, such as tenant, project, and cluster identifiers. + Metadata map[string]string `json:"metadata,omitempty"` // analyze table RunAutoAnalyze bool `json:"run_auto_analyze"` @@ -166,6 +168,20 @@ func IsPreTidbNormalRestart(keyspaceName string) (bool, string) { return true, preTidbNormalRestartMsg } +// ActivationMetadata returns a copy of metadata carried by the activate request. +func (c *LoadKeyspaceController) ActivationMetadata() map[string]string { + mu.RLock() + defer mu.RUnlock() + if len(activateRequest.Metadata) == 0 { + return nil + } + metadata := make(map[string]string, len(activateRequest.Metadata)) + for k, v := range activateRequest.Metadata { + metadata[k] = v + } + return metadata +} + // Handler returns a handler to query tidb pool status or activate or exit the tidb server. func (c *LoadKeyspaceController) Handler(svr *server.Server) (string, *http.ServeMux) { mux := http.NewServeMux() @@ -349,7 +365,12 @@ func (c *LoadKeyspaceController) WaitForActivate() { <-activateCh - logutil.BgLogger().Info("standby receive activate request", zap.Any("activate-request", activateRequest)) + logutil.BgLogger().Info("standby receive activate request", + zap.String("keyspace-name", activateRequest.KeyspaceName), + zap.Uint("max-idle-seconds", activateRequest.MaxIdleSeconds), + zap.Bool("run-auto-analyze", activateRequest.RunAutoAnalyze), + zap.Bool("tidb-enable-ddl", activateRequest.TiDBEnableDDL), + zap.Int("metadata-count", len(activateRequest.Metadata))) config.UpdateGlobal(func(c *config.Config) { c.KeyspaceName = activateRequest.KeyspaceName diff --git a/pkg/standby/standby_test.go b/pkg/standby/standby_test.go new file mode 100644 index 0000000000000..d5413d96e3d3f --- /dev/null +++ b/pkg/standby/standby_test.go @@ -0,0 +1,65 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package standby + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestActivateRequestMetadata(t *testing.T) { + var req ActivateRequest + require.NoError(t, json.Unmarshal([]byte(`{ + "keyspace_name": "ks", + "metadata": { + "meta_a": "value_a" + } + }`), &req)) + require.Equal(t, map[string]string{ + "meta_a": "value_a", + }, req.Metadata) + + mu.Lock() + originalRequest := activateRequest + activateRequest = req + mu.Unlock() + t.Cleanup(func() { + mu.Lock() + activateRequest = originalRequest + mu.Unlock() + }) + + controller := NewLoadKeyspaceController() + metadata := controller.ActivationMetadata() + require.Equal(t, req.Metadata, metadata) + metadata["meta_a"] = "changed" + require.Equal(t, "value_a", controller.ActivationMetadata()["meta_a"]) +} + +func TestActivateRequiresKeyspaceName(t *testing.T) { + controller := NewLoadKeyspaceController() + _, mux := controller.Handler(nil) + req := httptest.NewRequest(http.MethodPost, "/tidb-pool/activate", strings.NewReader(`{}`)) + resp := httptest.NewRecorder() + + mux.ServeHTTP(resp, req) + + require.Equal(t, http.StatusBadRequest, resp.Code) +} diff --git a/pkg/store/driver/BUILD.bazel b/pkg/store/driver/BUILD.bazel index 13a574bf66695..d7dd84b06e416 100644 --- a/pkg/store/driver/BUILD.bazel +++ b/pkg/store/driver/BUILD.bazel @@ -8,6 +8,7 @@ go_library( deps = [ "//pkg/kv", "//pkg/metrics", + "//pkg/metrics/common", "//pkg/sessionctx/variable", "//pkg/store/copr", "//pkg/store/driver/error", @@ -49,6 +50,7 @@ go_test( shard_count = 8, deps = [ "//pkg/kv", + "//pkg/metrics/common", "//pkg/session", "//pkg/store/mockstore", "//pkg/testkit", @@ -64,6 +66,7 @@ go_test( "@com_github_tikv_client_go_v2//oracle", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//tikvrpc", + "@com_github_tikv_pd_client//opt", "@org_uber_go_goleak//:goleak", ], ) diff --git a/pkg/store/driver/config_test.go b/pkg/store/driver/config_test.go index ab78573ee8a03..07fa0fdbc6633 100644 --- a/pkg/store/driver/config_test.go +++ b/pkg/store/driver/config_test.go @@ -17,8 +17,10 @@ package driver import ( "testing" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/config" + "github.com/tikv/pd/client/opt" ) func TestSetDefaultAndOptions(t *testing.T) { @@ -34,4 +36,14 @@ func TestSetDefaultAndOptions(t *testing.T) { require.Equal(t, globalConfig.TxnLocalLatches, d.txnLocalLatches) require.Equal(t, globalConfig.PDClient, d.pdConfig) require.Equal(t, origSecurity, config.GetGlobalConfig().Security) + + metricscommon.SetConstLabels("keyspace_id", "42", "keyspace_name", "ks") + t.Cleanup(func() { + metricscommon.SetConstLabels() + }) + pdOpt := opt.NewOption() + for _, apply := range d.pdClientOptions() { + apply(pdOpt) + } + require.Equal(t, metricscommon.GetConstLabels(), pdOpt.MetricsLabels) } diff --git a/pkg/store/driver/tikv_driver.go b/pkg/store/driver/tikv_driver.go index 9ca2772fca115..d61fa2c2d2200 100644 --- a/pkg/store/driver/tikv_driver.go +++ b/pkg/store/driver/tikv_driver.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/metrics" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/store/copr" derr "github.com/pingcap/tidb/pkg/store/driver/error" @@ -164,17 +165,7 @@ func (d *TiKVDriver) OpenWithOptions(path string, options ...Option) (resStore k CertPath: d.security.ClusterSSLCert, KeyPath: d.security.ClusterSSLKey, }, - opt.WithGRPCDialOptions( - // keep the same with etcd, see - // https://github.com/etcd-io/etcd/blob/5704c6148d798ea444db26a966394406d8c10526/server/etcdserver/api/v3rpc/grpc.go#L34 - grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), - grpc.WithKeepaliveParams(keepalive.ClientParameters{ - Time: time.Duration(d.tikvConfig.GrpcKeepAliveTime) * time.Second, - Timeout: time.Duration(d.tikvConfig.GrpcKeepAliveTimeout) * time.Second, - }), - ), - opt.WithCustomTimeoutOption(time.Duration(d.pdConfig.PDServerTimeout)*time.Second), - opt.WithForwardingOption(config.GetGlobalConfig().EnableForwarding)) + d.pdClientOptions()...) if err != nil { return nil, errors.Trace(err) } @@ -253,6 +244,26 @@ func (d *TiKVDriver) OpenWithOptions(path string, options ...Option) (resStore k return store, nil } +func (d *TiKVDriver) pdClientOptions() []opt.ClientOption { + opts := []opt.ClientOption{ + opt.WithGRPCDialOptions( + // keep the same with etcd, see + // https://github.com/etcd-io/etcd/blob/5704c6148d798ea444db26a966394406d8c10526/server/etcdserver/api/v3rpc/grpc.go#L34 + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), + grpc.WithKeepaliveParams(keepalive.ClientParameters{ + Time: time.Duration(d.tikvConfig.GrpcKeepAliveTime) * time.Second, + Timeout: time.Duration(d.tikvConfig.GrpcKeepAliveTimeout) * time.Second, + }), + ), + opt.WithCustomTimeoutOption(time.Duration(d.pdConfig.PDServerTimeout) * time.Second), + opt.WithForwardingOption(config.GetGlobalConfig().EnableForwarding), + } + if labels := metricscommon.GetConstLabels(); len(labels) > 0 { + opts = append(opts, opt.WithMetricsLabels(labels)) + } + return opts +} + type tikvStore struct { *tikv.KVStore etcdAddrs []string diff --git a/pkg/util/metricsutil/BUILD.bazel b/pkg/util/metricsutil/BUILD.bazel index ac57d68fa038b..1640bb09e51e4 100644 --- a/pkg/util/metricsutil/BUILD.bazel +++ b/pkg/util/metricsutil/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "metricsutil", @@ -29,9 +29,21 @@ go_library( "//pkg/util", "//pkg/util/topsql/reporter/metrics", "@com_github_pingcap_kvproto//pkg/keyspacepb", - "@com_github_tikv_client_go_v2//config", "@com_github_tikv_pd_client//:client", "@com_github_tikv_pd_client//opt", "@com_github_tikv_pd_client//pkg/caller", ], ) + +go_test( + name = "metricsutil_test", + timeout = "short", + srcs = ["common_test.go"], + embed = [":metricsutil"], + flaky = True, + deps = [ + "//pkg/config", + "//pkg/metrics/common", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/util/metricsutil/common.go b/pkg/util/metricsutil/common.go index 42e38e31cfff3..348f9ede2be66 100644 --- a/pkg/util/metricsutil/common.go +++ b/pkg/util/metricsutil/common.go @@ -17,6 +17,7 @@ package metricsutil import ( "context" "fmt" + "maps" "time" "github.com/pingcap/kvproto/pkg/keyspacepb" @@ -42,7 +43,6 @@ import ( ttlmetrics "github.com/pingcap/tidb/pkg/ttl/metrics" "github.com/pingcap/tidb/pkg/util" topsqlreporter_metrics "github.com/pingcap/tidb/pkg/util/topsql/reporter/metrics" - tikvconfig "github.com/tikv/client-go/v2/config" pd "github.com/tikv/pd/client" "github.com/tikv/pd/client/opt" "github.com/tikv/pd/client/pkg/caller" @@ -50,52 +50,21 @@ import ( var componentName = caller.Component("tidb-metrics-util") -// RegisterMetrics register metrics with const label 'keyspace_id' if keyspaceName set. +const keyspaceIDLabel = "keyspace_id" + +// RegisterMetrics registers metrics with keyspace metadata labels when available. func RegisterMetrics() error { cfg := config.GetGlobalConfig() - if keyspace.IsKeyspaceNameEmpty(cfg.KeyspaceName) || cfg.Store != config.StoreTypeTiKV { - registerMetrics(nil) // register metrics without label 'keyspace_id'. - return nil - } - if kerneltype.IsNextGen() { metricscommon.SetConstLabels("keyspace_name", cfg.KeyspaceName) } - - pdAddrs, _, _, err := tikvconfig.ParsePath("tikv://" + cfg.Path) - if err != nil { - return err - } - - timeoutSec := time.Duration(cfg.PDClient.PDServerTimeout) * time.Second - // Note: for NextGen, we need to use the side effect of `NewClient` to init the metrics' builtin const labels - pdCli, err := pd.NewClient(componentName, pdAddrs, pd.SecurityOption{ - CAPath: cfg.Security.ClusterSSLCA, - CertPath: cfg.Security.ClusterSSLCert, - KeyPath: cfg.Security.ClusterSSLKey, - }, opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels())) - if err != nil { - return err - } - defer pdCli.Close() - - if kerneltype.IsNextGen() { - registerMetrics(nil) // metrics' const label already set - } else { - keyspaceMeta, err := getKeyspaceMeta(pdCli, cfg.KeyspaceName) - if err != nil { - return err - } - registerMetrics(keyspaceMeta) - } - return nil + return registerMetrics() } -// RegisterMetricsForBR register metrics with const label keyspace_id for BR. +// RegisterMetricsForBR registers metrics with keyspace metadata labels for BR. func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName string) error { if keyspace.IsKeyspaceNameEmpty(keyspaceName) { - registerMetrics(nil) // register metrics without label 'keyspace_id'. - return nil + return registerMetrics() } if kerneltype.IsNextGen() { @@ -107,24 +76,19 @@ func RegisterMetricsForBR(pdAddrs []string, tls task.TLSConfig, keyspaceName str if tls.IsEnabled() { securityOpt = tls.ToPDSecurityOption() } - // Note: for NextGen, pdCli is created to init the metrics' const labels pdCli, err := pd.NewClient(componentName, pdAddrs, securityOpt, - opt.WithCustomTimeoutOption(timeoutSec), opt.WithMetricsLabels(metricscommon.GetConstLabels())) + opt.WithCustomTimeoutOption(timeoutSec), opt.WithInitMetricsOption(false)) if err != nil { return err } defer pdCli.Close() - if kerneltype.IsNextGen() { - registerMetrics(nil) // metrics' const label already set - } else { - keyspaceMeta, err := getKeyspaceMeta(pdCli, keyspaceName) - if err != nil { - return err - } - registerMetrics(keyspaceMeta) + keyspaceMeta, err := getKeyspaceMeta(pdCli, keyspaceName) + if err != nil { + return err } - return nil + setKeyspaceIDConstLabel(keyspaceMeta.GetId()) + return registerMetrics() } func initMetrics() { @@ -150,11 +114,36 @@ func initMetrics() { } } -func registerMetrics(keyspaceMeta *keyspacepb.KeyspaceMeta) { - if keyspaceMeta != nil { - metricscommon.SetConstLabels("keyspace_id", fmt.Sprint(keyspaceMeta.GetId())) +func registerMetrics() error { + labels := cloneConstLabels() + maps.Copy(labels, config.GetGlobalConfig().GetKeyspaceObservabilityMetricLabels()) + if len(labels) > 0 { + setConstLabels(labels) } initMetrics() + return nil +} + +func cloneConstLabels() map[string]string { + labels := maps.Clone(metricscommon.GetConstLabels()) + if labels == nil { + labels = make(map[string]string) + } + return labels +} + +func setKeyspaceIDConstLabel(keyspaceID uint32) { + labels := cloneConstLabels() + labels[keyspaceIDLabel] = fmt.Sprint(keyspaceID) + setConstLabels(labels) +} + +func setConstLabels(labels map[string]string) { + kv := make([]string, 0, len(labels)*2) + for k, v := range labels { + kv = append(kv, k, v) + } + metricscommon.SetConstLabels(kv...) } func getKeyspaceMeta(pdCli pd.Client, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { diff --git a/pkg/util/metricsutil/common_test.go b/pkg/util/metricsutil/common_test.go new file mode 100644 index 0000000000000..a25ae826eeeb3 --- /dev/null +++ b/pkg/util/metricsutil/common_test.go @@ -0,0 +1,53 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metricsutil + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/config" + metricscommon "github.com/pingcap/tidb/pkg/metrics/common" + "github.com/stretchr/testify/require" +) + +func TestRegisterMetricsWithKeyspaceObservabilityValues(t *testing.T) { + restore := config.RestoreFunc() + defer restore() + t.Cleanup(func() { + metricscommon.SetConstLabels() + }) + + labels := cloneConstLabels() + labels["label_a"] = "value_a" + require.Equal(t, "value_a", labels["label_a"]) + + metricscommon.SetConstLabels("base_label", "base_value") + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservabilityValues = config.KeyspaceObservabilityValues{ + MetricLabels: map[string]string{"label_a": "value_a"}, + } + }) + + require.NoError(t, registerMetrics()) + labels = metricscommon.GetConstLabels() + require.Equal(t, "base_value", labels["base_label"]) + require.Equal(t, "value_a", labels["label_a"]) + + metricscommon.SetConstLabels("keyspace_name", "ks") + setKeyspaceIDConstLabel(42) + labels = metricscommon.GetConstLabels() + require.Equal(t, "ks", labels["keyspace_name"]) + require.Equal(t, "42", labels["keyspace_id"]) +} diff --git a/pkg/util/stmtsummary/v2/BUILD.bazel b/pkg/util/stmtsummary/v2/BUILD.bazel index 1abfaf8d31663..db4ef2262fcd0 100644 --- a/pkg/util/stmtsummary/v2/BUILD.bazel +++ b/pkg/util/stmtsummary/v2/BUILD.bazel @@ -51,6 +51,7 @@ go_test( flaky = True, shard_count = 15, deps = [ + "//pkg/config", "//pkg/meta/model", "//pkg/metrics", "//pkg/parser/ast", diff --git a/pkg/util/stmtsummary/v2/logger.go b/pkg/util/stmtsummary/v2/logger.go index 64c3499c6ba28..629828d8e0307 100644 --- a/pkg/util/stmtsummary/v2/logger.go +++ b/pkg/util/stmtsummary/v2/logger.go @@ -20,6 +20,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/util/logutil" "go.uber.org/zap" "go.uber.org/zap/buffer" @@ -71,7 +72,7 @@ func (s *stmtLogStorage) sync() error { } func (s *stmtLogStorage) log(r *StmtRecord) { - b, err := json.Marshal(r) + b, err := marshalStmtRecord(r) if err != nil { logutil.BgLogger().Warn("failed to marshal statement summary", zap.Error(err)) return @@ -79,6 +80,22 @@ func (s *stmtLogStorage) log(r *StmtRecord) { s.logger.Info(string(b)) } +func marshalStmtRecord(r *StmtRecord) ([]byte, error) { + fields := config.GetGlobalConfig().GetKeyspaceObservabilityStmtLogFields() + if len(fields) == 0 { + return json.Marshal(r) + } + return json.Marshal(stmtRecordWithAdditionalFields{ + StmtRecord: r, + AdditionalFields: fields, + }) +} + +type stmtRecordWithAdditionalFields struct { + *StmtRecord + AdditionalFields map[string]string `json:"additional_fields"` +} + type stmtLogEncoder struct{} func (*stmtLogEncoder) EncodeEntry(entry zapcore.Entry, _ []zapcore.Field) (*buffer.Buffer, error) { diff --git a/pkg/util/stmtsummary/v2/record_test.go b/pkg/util/stmtsummary/v2/record_test.go index fbb65feb5468c..8fcfc88f179d2 100644 --- a/pkg/util/stmtsummary/v2/record_test.go +++ b/pkg/util/stmtsummary/v2/record_test.go @@ -15,8 +15,10 @@ package stmtsummary import ( + "encoding/json" "testing" + "github.com/pingcap/tidb/pkg/config" "github.com/stretchr/testify/require" ) @@ -83,4 +85,22 @@ func TestStmtRecord(t *testing.T) { require.Equal(t, info.TotalRUV2*2, record2.SumRUV2) require.Equal(t, info.CPUUsages.TidbCPUTime*2, record2.SumTidbCPU) require.Equal(t, info.CPUUsages.TikvCPUTime*2, record2.SumTikvCPU) + + restore := config.RestoreFunc() + defer restore() + config.UpdateGlobal(func(conf *config.Config) { + conf.KeyspaceObservability = config.KeyspaceObservability{ + Fields: []config.KeyspaceObservabilityField{{ + Source: "meta_a", + StmtLogField: "stmt_meta_a", + }}, + } + require.NoError(t, conf.ResolveKeyspaceObservability(map[string]string{"meta_a": "value_a"})) + }) + b, err := marshalStmtRecord(record2) + require.NoError(t, err) + items := make(map[string]any) + require.NoError(t, json.Unmarshal(b, &items)) + require.Equal(t, map[string]any{"stmt_meta_a": "value_a"}, items["additional_fields"]) + require.Equal(t, record2.Digest, items["digest"]) }