From da93b6f58bc50a3e6f34787dc284311f78b07707 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 10 Feb 2026 21:06:27 -0800 Subject: [PATCH 01/24] planner: correlate subquery rule (wip) --- pkg/planner/core/casetest/rule/main_test.go | 5 + .../core/casetest/rule/rule_correlate_test.go | 54 ++++++ .../rule/testdata/correlate_suite_in.json | 9 + .../rule/testdata/correlate_suite_out.json | 37 +++++ .../rule/testdata/correlate_suite_xut.json | 37 +++++ pkg/planner/core/optimizer.go | 4 + pkg/planner/core/rule/logical_rules.go | 1 + pkg/planner/core/rule_correlate.go | 157 ++++++++++++++++++ pkg/sessionctx/vardef/tidb_vars.go | 5 + pkg/sessionctx/variable/session.go | 4 + pkg/sessionctx/variable/sysvar.go | 4 + 11 files changed, 317 insertions(+) create mode 100644 pkg/planner/core/casetest/rule/rule_correlate_test.go create mode 100644 pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json create mode 100644 pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json create mode 100644 pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json create mode 100644 pkg/planner/core/rule_correlate.go diff --git a/pkg/planner/core/casetest/rule/main_test.go b/pkg/planner/core/casetest/rule/main_test.go index 29dc38057e3b8..68ecea40c536e 100644 --- a/pkg/planner/core/casetest/rule/main_test.go +++ b/pkg/planner/core/casetest/rule/main_test.go @@ -35,6 +35,7 @@ func TestMain(m *testing.M) { testDataMap.LoadTestSuiteData("testdata", "predicate_pushdown_suite", true) testDataMap.LoadTestSuiteData("testdata", "predicate_simplification", true) testDataMap.LoadTestSuiteData("testdata", "outer_to_semi_join_suite", true) + testDataMap.LoadTestSuiteData("testdata", "correlate_suite", true) opts := []goleak.Option{ goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"), @@ -77,3 +78,7 @@ func GetPredicateSimplificationSuiteData() testdata.TestData { func GetOuterToSemiJoinSuiteData() testdata.TestData { return testDataMap["outer_to_semi_join_suite"] } + +func GetCorrelateSuiteData() testdata.TestData { + return testDataMap["correlate_suite"] +} diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go new file mode 100644 index 0000000000000..81c5c670eb494 --- /dev/null +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -0,0 +1,54 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package rule + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/testkit" + "github.com/pingcap/tidb/pkg/testkit/testdata" +) + +func TestCorrelate(tt *testing.T) { + testkit.RunTestUnderCascades(tt, func(t *testing.T, tk *testkit.TestKit, cascades, caller string) { + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1 (a int, b int, key(a))") + tk.MustExec("create table t2 (a int, b int, key(a))") + tk.MustExec("insert into t1 values (1,1),(2,2),(3,3)") + tk.MustExec("insert into t2 values (1,10),(2,20)") + + // Enable the correlate rule. + tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") + + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + suite := GetCorrelateSuiteData() + suite.LoadTestCases(t, &input, &output, cascades, caller) + for i, sql := range input { + testdata.OnRecord(func() { + output[i].SQL = sql + output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + sql).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows()) + }) + tk.MustQuery("explain format = 'brief' " + sql).Check(testkit.Rows(output[i].Plan...)) + tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...)) + } + }) +} diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json new file mode 100644 index 0000000000000..ebb140c6d8870 --- /dev/null +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json @@ -0,0 +1,9 @@ +[ + { + "name": "TestCorrelate", + "cases": [ + "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", + "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)" + ] + } +] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json new file mode 100644 index 0000000000000..9cc59e02bb954 --- /dev/null +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -0,0 +1,37 @@ +[ + { + "Name": "TestCorrelate", + "Cases": [ + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Selection(Probe) 79840080.00 root eq(test.t2.a, test.t1.a)", + " └─IndexReader 99800100.00 root index:IndexFullScan", + " └─IndexFullScan 99800100.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Selection(Probe) 80000000.00 root eq(test.t2.a, test.t1.a)", + " └─IndexReader 100000000.00 root index:IndexFullScan", + " └─IndexFullScan 100000000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + ], + "Result": [ + "3 3" + ] + } + ] + } +] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json new file mode 100644 index 0000000000000..9cc59e02bb954 --- /dev/null +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -0,0 +1,37 @@ +[ + { + "Name": "TestCorrelate", + "Cases": [ + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Selection(Probe) 79840080.00 root eq(test.t2.a, test.t1.a)", + " └─IndexReader 99800100.00 root index:IndexFullScan", + " └─IndexFullScan 99800100.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Selection(Probe) 80000000.00 root eq(test.t2.a, test.t1.a)", + " └─IndexReader 100000000.00 root index:IndexFullScan", + " └─IndexFullScan 100000000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + ], + "Result": [ + "3 3" + ] + } + ] + } +] diff --git a/pkg/planner/core/optimizer.go b/pkg/planner/core/optimizer.go index 1f562514175be..bc9a2f91e5c69 100644 --- a/pkg/planner/core/optimizer.go +++ b/pkg/planner/core/optimizer.go @@ -110,6 +110,7 @@ var optRuleList = []base.LogicalOptRule{ &EliminateUnionAllDualItem{}, &EmptySelectionEliminator{}, &ResolveExpand{}, + &CorrelateSolver{}, } // Interaction Rule List @@ -363,6 +364,9 @@ func adjustOptimizationFlags(flag uint64, logic base.LogicalPlan) uint64 { if !logic.SCtx().GetSessionVars().StmtCtx.UseDynamicPruneMode { flag |= rule.FlagPartitionProcessor // apply partition pruning under static mode } + if logic.SCtx().GetSessionVars().EnableCorrelateSubquery { + flag |= rule.FlagCorrelate + } return flag } diff --git a/pkg/planner/core/rule/logical_rules.go b/pkg/planner/core/rule/logical_rules.go index 2c82cbde30b48..6a70c3bc0885a 100644 --- a/pkg/planner/core/rule/logical_rules.go +++ b/pkg/planner/core/rule/logical_rules.go @@ -45,6 +45,7 @@ const ( FlagEliminateUnionAllDualItem FlagEmptySelectionEliminator FlagResolveExpand + FlagCorrelate ) func setPredicatePushDownFlag(u uint64) uint64 { diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go new file mode 100644 index 0000000000000..d10683f5d465f --- /dev/null +++ b/pkg/planner/core/rule_correlate.go @@ -0,0 +1,157 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "context" + + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" + "github.com/pingcap/tidb/pkg/types" +) + +// CorrelateSolver tries to convert semi-join LogicalJoin back to correlated LogicalApply. +// This is the reverse of DecorrelateSolver and is useful when a correlated nested-loop +// (index lookup per outer row) might be more efficient than a hash semi-join. +type CorrelateSolver struct{} + +// Optimize implements base.LogicalOptRule.<0th> interface. +func (s *CorrelateSolver) Optimize(ctx context.Context, p base.LogicalPlan) (base.LogicalPlan, bool, error) { + return s.correlate(ctx, p) +} + +func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (base.LogicalPlan, bool, error) { + // CTE's logical optimization is independent. + if _, ok := p.(*logicalop.LogicalCTE); ok { + return p, false, nil + } + + // First recurse into children. + planChanged := false + newChildren := make([]base.LogicalPlan, 0, len(p.Children())) + for _, child := range p.Children() { + np, changed, err := s.correlate(ctx, child) + if err != nil { + return nil, false, err + } + planChanged = planChanged || changed + newChildren = append(newChildren, np) + } + p.SetChildren(newChildren...) + + // Check if this node is a LogicalApply — if so, skip (already correlated). + if _, isApply := p.(*logicalop.LogicalApply); isApply { + return p, planChanged, nil + } + + // Check if this node is a LogicalJoin with a semi-join type. + join, isJoin := p.(*logicalop.LogicalJoin) + if !isJoin || !join.JoinType.IsSemiJoin() { + return p, planChanged, nil + } + + // Must have EqualConditions to correlate (skip if only NAEQConditions). + if len(join.EqualConditions) == 0 { + return p, planChanged, nil + } + + // For v1: skip null-aware conditions, LeftConditions, and OtherConditions. + if len(join.NAEQConditions) > 0 || len(join.LeftConditions) > 0 || len(join.OtherConditions) > 0 { + return p, planChanged, nil + } + + leftSchema := join.Children()[0].Schema() + rightSchema := join.Children()[1].Schema() + + var selConds []expression.Expression + var corCols []*expression.CorrelatedColumn + + // Convert EqualConditions to correlated conditions. + for _, eqCond := range join.EqualConditions { + cond, corCol := s.buildCorrelatedCond(eqCond, leftSchema, rightSchema, join) + if cond == nil { + // Can't correlate this condition; abort. + return p, planChanged, nil + } + selConds = append(selConds, cond) + corCols = append(corCols, corCol) + } + + // Move RightConditions to the selection (they reference only the inner side). + selConds = append(selConds, join.RightConditions...) + + // Build the LogicalSelection on the inner (right) child. + innerChild := join.Children()[1] + sel := logicalop.LogicalSelection{Conditions: selConds}.Init(join.SCtx(), join.QueryBlockOffset()) + sel.SetChildren(innerChild) + + // Build the LogicalApply. + ap := logicalop.LogicalApply{}.Init(join.SCtx(), join.QueryBlockOffset()) + ap.JoinType = join.JoinType + ap.CorCols = corCols + ap.SetChildren(join.Children()[0], sel) + ap.SetSchema(join.Schema().Clone()) + ap.SetOutputNames(join.OutputNames()) + + return ap, true, nil +} + +// buildCorrelatedCond converts an equal condition from the join into a correlated condition +// for the inner selection. It identifies which column comes from the left (outer) side and +// creates a CorrelatedColumn for it, then builds a new condition: rightCol CorCol(leftCol). +func (*CorrelateSolver) buildCorrelatedCond( + eqCond *expression.ScalarFunction, + leftSchema *expression.Schema, + rightSchema *expression.Schema, + join *logicalop.LogicalJoin, +) (expression.Expression, *expression.CorrelatedColumn) { + col0, col1, ok := expression.IsColOpCol(eqCond) + if !ok { + return nil, nil + } + + // Determine which column is from the left (outer) side and which from the right (inner). + leftCol := leftSchema.RetrieveColumn(col0) + rightCol := rightSchema.RetrieveColumn(col1) + if leftCol == nil || rightCol == nil { + // Try swapped order. + leftCol = leftSchema.RetrieveColumn(col1) + rightCol = rightSchema.RetrieveColumn(col0) + } + if leftCol == nil || rightCol == nil { + return nil, nil + } + + // Create a CorrelatedColumn for the outer (left) column. + // Data must be initialized (non-nil) to avoid panics during physical planning. + corCol := &expression.CorrelatedColumn{Column: *leftCol, Data: new(types.Datum)} + + // Create the correlated condition: rightCol CorCol(leftCol). + cond := expression.NewFunctionInternal( + join.SCtx().GetExprCtx(), + eqCond.FuncName.L, + types.NewFieldType(mysql.TypeTiny), + rightCol, corCol, + ) + + return cond, corCol +} + +// Name implements base.LogicalOptRule.<1st> interface. +func (*CorrelateSolver) Name() string { + return "correlate" +} diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index d89f720d09721..8aa9e5e488a1b 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -338,6 +338,10 @@ const ( // inner-join with aggregation (equivalent to SEMI_JOIN_REWRITE() hint). TiDBOptEnableSemiJoinRewrite = "tidb_opt_enable_semi_join_rewrite" + // TiDBOptEnableCorrelateSubquery controls conversion of non-correlated semi-joins + // back to correlated Apply (reverse of decorrelation). + TiDBOptEnableCorrelateSubquery = "tidb_opt_enable_correlate_subquery" + // TiDBOptEnableCorrelationAdjustment is used to indicates if enable correlation adjustment. TiDBOptEnableCorrelationAdjustment = "tidb_opt_enable_correlation_adjustment" @@ -1451,6 +1455,7 @@ const ( DefOptPreferRangeScan = true DefOptEnableNoDecorrelateInSelect = false DefOptEnableSemiJoinRewrite = false + DefOptEnableCorrelateSubquery = false DefBatchInsert = false DefBatchDelete = false DefBatchCommit = false diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go index e26a0f85fc8ee..3dd81d737b22b 100644 --- a/pkg/sessionctx/variable/session.go +++ b/pkg/sessionctx/variable/session.go @@ -1162,6 +1162,9 @@ type SessionVars struct { // EnableSemiJoinRewrite enables the SEMI_JOIN_REWRITE hint for subqueries in the where clause. EnableSemiJoinRewrite bool + // EnableCorrelateSubquery enables conversion of non-correlated semi-joins to correlated Apply. + EnableCorrelateSubquery bool + // AllowProjectionPushDown enables pushdown projection on TiKV. AllowProjectionPushDown bool @@ -2280,6 +2283,7 @@ func NewSessionVars(hctx HookContext) *SessionVars { EnableOuterJoinReorder: vardef.DefTiDBEnableOuterJoinReorder, EnableNoDecorrelateInSelect: vardef.DefOptEnableNoDecorrelateInSelect, EnableSemiJoinRewrite: vardef.DefOptEnableSemiJoinRewrite, + EnableCorrelateSubquery: vardef.DefOptEnableCorrelateSubquery, RetryLimit: vardef.DefTiDBRetryLimit, DisableTxnAutoRetry: vardef.DefTiDBDisableTxnAutoRetry, DDLReorgPriority: kv.PriorityLow, diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index 3bd2647e47496..17d73a8641782 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -342,6 +342,10 @@ var defaultSysVars = []*SysVar{ s.EnableSemiJoinRewrite = TiDBOptOn(val) return nil }}, + {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptEnableCorrelateSubquery, Value: BoolToOnOff(vardef.DefOptEnableCorrelateSubquery), Type: vardef.TypeBool, SetSession: func(s *SessionVars, val string) error { + s.EnableCorrelateSubquery = TiDBOptOn(val) + return nil + }}, {Scope: vardef.ScopeSession, Name: vardef.TiDBDDLReorgPriority, Value: "PRIORITY_LOW", Type: vardef.TypeEnum, skipInit: true, PossibleValues: []string{"PRIORITY_LOW", "PRIORITY_NORMAL", "PRIORITY_HIGH"}, SetSession: func(s *SessionVars, val string) error { s.setDDLReorgPriority(val) return nil From 171333afeb1876846f7c07ae4e77ab801b990540 Mon Sep 17 00:00:00 2001 From: tpp Date: Wed, 11 Feb 2026 06:33:06 -0800 Subject: [PATCH 02/24] review comments1 --- pkg/planner/core/BUILD.bazel | 1 + pkg/planner/core/casetest/rule/BUILD.bazel | 3 ++- .../rule/testdata/correlate_suite_out.json | 10 ++++------ .../rule/testdata/correlate_suite_xut.json | 10 ++++------ pkg/planner/core/optimizer.go | 15 +++++++++++---- pkg/planner/core/optimizer_test.go | 17 +++++++++++++++++ pkg/planner/core/rule_correlate.go | 16 +++++++++++++--- pkg/sessionctx/vardef/tidb_vars.go | 2 +- 8 files changed, 53 insertions(+), 21 deletions(-) diff --git a/pkg/planner/core/BUILD.bazel b/pkg/planner/core/BUILD.bazel index df7c8ec4808a0..abd300503afd2 100644 --- a/pkg/planner/core/BUILD.bazel +++ b/pkg/planner/core/BUILD.bazel @@ -49,6 +49,7 @@ go_library( "rule_aggregation_elimination.go", "rule_aggregation_push_down.go", "rule_aggregation_skew_rewrite.go", + "rule_correlate.go", "rule_decorrelate.go", "rule_derive_topn_from_window.go", "rule_eliminate_empty_selection.go", diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index 08693ffe1908b..fb25a5a45bc5c 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -6,6 +6,7 @@ go_test( srcs = [ "dual_test.go", "main_test.go", + "rule_correlate_test.go", "rule_derive_topn_from_window_test.go", "rule_eliminate_empty_selection_test.go", "rule_eliminate_projection_test.go", @@ -18,7 +19,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 18, + shard_count = 19, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index 9cc59e02bb954..03041bb419950 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -9,9 +9,8 @@ "├─TableReader(Build) 9990.00 root data:Selection", "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Selection(Probe) 79840080.00 root eq(test.t2.a, test.t1.a)", - " └─IndexReader 99800100.00 root index:IndexFullScan", - " └─IndexFullScan 99800100.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", + " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -24,9 +23,8 @@ "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", "├─TableReader(Build) 10000.00 root data:TableFullScan", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Selection(Probe) 80000000.00 root eq(test.t2.a, test.t1.a)", - " └─IndexReader 100000000.00 root index:IndexFullScan", - " └─IndexFullScan 100000000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + "└─IndexReader(Probe) 100000.00 root index:IndexRangeScan", + " └─IndexRangeScan 100000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "3 3" diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index 9cc59e02bb954..03041bb419950 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -9,9 +9,8 @@ "├─TableReader(Build) 9990.00 root data:Selection", "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Selection(Probe) 79840080.00 root eq(test.t2.a, test.t1.a)", - " └─IndexReader 99800100.00 root index:IndexFullScan", - " └─IndexFullScan 99800100.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", + " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -24,9 +23,8 @@ "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", "├─TableReader(Build) 10000.00 root data:TableFullScan", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Selection(Probe) 80000000.00 root eq(test.t2.a, test.t1.a)", - " └─IndexReader 100000000.00 root index:IndexFullScan", - " └─IndexFullScan 100000000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + "└─IndexReader(Probe) 100000.00 root index:IndexRangeScan", + " └─IndexRangeScan 100000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "3 3" diff --git a/pkg/planner/core/optimizer.go b/pkg/planner/core/optimizer.go index bc9a2f91e5c69..21c73b49c220e 100644 --- a/pkg/planner/core/optimizer.go +++ b/pkg/planner/core/optimizer.go @@ -345,10 +345,6 @@ func VolcanoOptimize(ctx context.Context, sctx base.PlanContext, flag uint64, lo } func adjustOptimizationFlags(flag uint64, logic base.LogicalPlan) uint64 { - // If there is something after flagPrunColumns, do FlagPruneColumnsAgain. - if flag&rule.FlagPruneColumns > 0 && flag-rule.FlagPruneColumns > rule.FlagPruneColumns { - flag |= rule.FlagPruneColumnsAgain - } if checkStableResultMode(logic.SCtx()) { flag |= rule.FlagStabilizeResults } @@ -367,6 +363,17 @@ func adjustOptimizationFlags(flag uint64, logic base.LogicalPlan) uint64 { if logic.SCtx().GetSessionVars().EnableCorrelateSubquery { flag |= rule.FlagCorrelate } + // Recompute FlagPruneColumnsAgain after all conditional flag mutations so + // that conditionally-added flags (FlagCorrelate, FlagPartitionProcessor, …) + // are taken into account. A second column-prune pass is worthwhile when + // any rule above column pruning is enabled. + if flag&rule.FlagPruneColumns != 0 { + // Mask of all flag bits strictly above FlagPruneColumns. + const abovePruneColumns = ^(rule.FlagPruneColumns | (rule.FlagPruneColumns - 1)) + if flag&abovePruneColumns != 0 { + flag |= rule.FlagPruneColumnsAgain + } + } return flag } diff --git a/pkg/planner/core/optimizer_test.go b/pkg/planner/core/optimizer_test.go index 90a7837b759dc..07c6d21e603c9 100644 --- a/pkg/planner/core/optimizer_test.go +++ b/pkg/planner/core/optimizer_test.go @@ -15,6 +15,7 @@ package core import ( + "math/bits" "reflect" "strings" "testing" @@ -27,6 +28,7 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/planner/core/base" "github.com/pingcap/tidb/pkg/planner/core/operator/physicalop" + "github.com/pingcap/tidb/pkg/planner/core/rule" "github.com/pingcap/tidb/pkg/planner/property" "github.com/pingcap/tidb/pkg/planner/util/coretestsdk" "github.com/pingcap/tidb/pkg/types" @@ -455,3 +457,18 @@ func TestCanTiFlashUseHashJoinV2(t *testing.T) { // can not use hash join v2 due to null eq require.False(t, hashJoin.CanTiFlashUseHashJoinV2(sctx)) } + +func TestOptRuleListFlagAlignment(t *testing.T) { + // Each position i in optRuleList is gated by the flag bit 1< Date: Wed, 11 Feb 2026 07:59:31 -0800 Subject: [PATCH 03/24] add testcases1 --- .../rule/testdata/correlate_suite_in.json | 8 ++- .../rule/testdata/correlate_suite_out.json | 59 +++++++++++++++++++ .../rule/testdata/correlate_suite_xut.json | 59 +++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json index ebb140c6d8870..4668c381ded8c 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json @@ -3,7 +3,13 @@ "name": "TestCorrelate", "cases": [ "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", - "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)" + "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", + "select * from t1 where a in (select a from t2)", + "select * from t1 where exists (select 1 from t2)", + "select * from t1 where a not in (select a from t2)", + "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", + "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", + "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)" ] } ] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index 03041bb419950..a46a6cc7a169f 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -29,6 +29,65 @@ "Result": [ "3 3" ] + }, + { + "SQL": "select * from t1 where a in (select a from t2)", + "Plan": [ + "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2)", + "Plan": [ + "TableReader 10000.00 root data:TableFullScan", + "└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "ScalarSubQuery N/A root Output: ScalarQueryCol#10, ScalarQueryCol#11, ScalarQueryCol#12, ScalarQueryCol#13", + "└─TableReader 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2", + "3 3" + ] + }, + { + "SQL": "select * from t1 where a not in (select a from t2)", + "Plan": [ + "HashJoin 8000.00 root Null-aware anti semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 10000.00 root index:IndexFullScan", + "│ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "3 3" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", + "Plan": [ + "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1" + ] } ] } diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index 03041bb419950..a46a6cc7a169f 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -29,6 +29,65 @@ "Result": [ "3 3" ] + }, + { + "SQL": "select * from t1 where a in (select a from t2)", + "Plan": [ + "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2)", + "Plan": [ + "TableReader 10000.00 root data:TableFullScan", + "└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "ScalarSubQuery N/A root Output: ScalarQueryCol#10, ScalarQueryCol#11, ScalarQueryCol#12, ScalarQueryCol#13", + "└─TableReader 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2", + "3 3" + ] + }, + { + "SQL": "select * from t1 where a not in (select a from t2)", + "Plan": [ + "HashJoin 8000.00 root Null-aware anti semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 10000.00 root index:IndexFullScan", + "│ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "3 3" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", + "Plan": [ + "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1" + ] } ] } From 1d7e38a08f910bcc150266fb26d02ee7c10d4614 Mon Sep 17 00:00:00 2001 From: tpp Date: Wed, 11 Feb 2026 08:12:44 -0800 Subject: [PATCH 04/24] add testcases2 --- .../rule/testdata/correlate_suite_out.json | 32 +++++++++++++++++++ .../rule/testdata/correlate_suite_xut.json | 32 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index a46a6cc7a169f..a95ad0e4fbf0f 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -88,6 +88,38 @@ "Result": [ "1 1" ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", + "Plan": [ + "HashJoin 7984.01 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)], other cond:gt(test.t2.b, test.t1.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9980.01 root data:Selection", + " └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)", + "Plan": [ + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index a46a6cc7a169f..a95ad0e4fbf0f 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -88,6 +88,38 @@ "Result": [ "1 1" ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", + "Plan": [ + "HashJoin 7984.01 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)], other cond:gt(test.t2.b, test.t1.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9980.01 root data:Selection", + " └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)", + "Plan": [ + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } From 62df8f804eb60f5ad2378e4f59e5fd0bd7767930 Mon Sep 17 00:00:00 2001 From: tpp Date: Thu, 19 Feb 2026 09:01:34 -0800 Subject: [PATCH 05/24] refactor for order --- .../core/casetest/rule/rule_correlate_test.go | 4 +- .../rule/testdata/correlate_suite_in.json | 4 +- .../rule/testdata/correlate_suite_out.json | 53 ++++++++++++++++--- .../rule/testdata/correlate_suite_xut.json | 53 ++++++++++++++++--- pkg/planner/core/exhaust_physical_plans.go | 6 ++- pkg/planner/core/expression_rewriter.go | 5 +- pkg/planner/core/rule_correlate.go | 44 +++++++++++++++ 7 files changed, 149 insertions(+), 20 deletions(-) diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go index 81c5c670eb494..1aee8185b9202 100644 --- a/pkg/planner/core/casetest/rule/rule_correlate_test.go +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -24,11 +24,13 @@ import ( func TestCorrelate(tt *testing.T) { testkit.RunTestUnderCascades(tt, func(t *testing.T, tk *testkit.TestKit, cascades, caller string) { tk.MustExec("use test") - tk.MustExec("drop table if exists t1, t2") + tk.MustExec("drop table if exists t1, t2, t3") tk.MustExec("create table t1 (a int, b int, key(a))") tk.MustExec("create table t2 (a int, b int, key(a))") + tk.MustExec("create table t3 (a int, b int, key(a))") tk.MustExec("insert into t1 values (1,1),(2,2),(3,3)") tk.MustExec("insert into t2 values (1,10),(2,20)") + tk.MustExec("insert into t3 values (10,1),(20,2)") // Enable the correlate rule. tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json index 4668c381ded8c..38d48f872c1e5 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json @@ -9,7 +9,9 @@ "select * from t1 where a not in (select a from t2)", "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", - "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)" + "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)", + "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", + "select * from t1 where a in (select a from t2) order by a limit 10" ] } ] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index a95ad0e4fbf0f..9ec6d2fdc6b17 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -33,14 +33,12 @@ { "SQL": "select * from t1 where a in (select a from t2)", "Plan": [ - "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", - "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", - "│ └─IndexReader 7992.00 root index:StreamAgg", - "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", - "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", - "└─TableReader(Probe) 9990.00 root data:Selection", - " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", + " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -120,6 +118,45 @@ "1 1", "2 2" ] + }, + { + "SQL": "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", + "Plan": [ + "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexHashJoin(Probe) 124750.12 root inner join, inner:IndexLookUp, outer key:test.t2.b, inner key:test.t3.a, equal cond:eq(test.t2.b, test.t3.a)", + " ├─IndexLookUp(Build) 99800.10 root ", + " │ ├─IndexRangeScan(Build) 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " │ └─Selection(Probe) 99800.10 cop[tikv] not(isnull(test.t2.b))", + " │ └─TableRowIDScan 99900.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─IndexLookUp(Probe) 124750.12 root ", + " ├─Selection(Build) 124750.12 cop[tikv] not(isnull(test.t3.a))", + " │ └─IndexRangeScan 124875.00 cop[tikv] table:t3, index:a(a) range: decided by [eq(test.t3.a, test.t2.b)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 124750.12 cop[tikv] table:t3 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", + "Plan": [ + "Limit 10.00 root offset:0, count:10", + "└─Apply 10.00 root CARTESIAN semi join, left side:Projection", + " ├─Projection(Build) 10.00 root test.t1.a, test.t1.b", + " │ └─IndexLookUp 10.00 root ", + " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─IndexReader(Probe) 100.00 root index:IndexRangeScan", + " └─IndexRangeScan 100.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index a95ad0e4fbf0f..9ec6d2fdc6b17 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -33,14 +33,12 @@ { "SQL": "select * from t1 where a in (select a from t2)", "Plan": [ - "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", - "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", - "│ └─IndexReader 7992.00 root index:StreamAgg", - "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", - "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", - "└─TableReader(Probe) 9990.00 root data:Selection", - " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", + " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -120,6 +118,45 @@ "1 1", "2 2" ] + }, + { + "SQL": "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", + "Plan": [ + "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexHashJoin(Probe) 124750.12 root inner join, inner:IndexLookUp, outer key:test.t2.b, inner key:test.t3.a, equal cond:eq(test.t2.b, test.t3.a)", + " ├─IndexLookUp(Build) 99800.10 root ", + " │ ├─IndexRangeScan(Build) 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " │ └─Selection(Probe) 99800.10 cop[tikv] not(isnull(test.t2.b))", + " │ └─TableRowIDScan 99900.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─IndexLookUp(Probe) 124750.12 root ", + " ├─Selection(Build) 124750.12 cop[tikv] not(isnull(test.t3.a))", + " │ └─IndexRangeScan 124875.00 cop[tikv] table:t3, index:a(a) range: decided by [eq(test.t3.a, test.t2.b)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 124750.12 cop[tikv] table:t3 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", + "Plan": [ + "Limit 10.00 root offset:0, count:10", + "└─Apply 10.00 root CARTESIAN semi join, left side:Projection", + " ├─Projection(Build) 10.00 root test.t1.a, test.t1.b", + " │ └─IndexLookUp 10.00 root ", + " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─IndexReader(Probe) 100.00 root index:IndexRangeScan", + " └─IndexRangeScan 100.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } diff --git a/pkg/planner/core/exhaust_physical_plans.go b/pkg/planner/core/exhaust_physical_plans.go index 237dee64a526e..8b20824a98a1e 100644 --- a/pkg/planner/core/exhaust_physical_plans.go +++ b/pkg/planner/core/exhaust_physical_plans.go @@ -2878,6 +2878,10 @@ func exhaustPhysicalPlans4LogicalApply(super base.LogicalPlan, prop *property.Ph canUseCache = false } + outerExpectedCnt := math.MaxFloat64 + if !prop.IsSortItemEmpty() { + outerExpectedCnt = prop.ExpectedCnt + } apply := physicalop.PhysicalApply{ PhysicalHashJoin: *join, OuterSchema: la.CorCols, @@ -2885,7 +2889,7 @@ func exhaustPhysicalPlans4LogicalApply(super base.LogicalPlan, prop *property.Ph }.Init(la.SCtx(), la.StatsInfo().ScaleByExpectCnt(la.SCtx().GetSessionVars(), prop.ExpectedCnt), la.QueryBlockOffset(), - &property.PhysicalProperty{ExpectedCnt: math.MaxFloat64, SortItems: prop.SortItems, CTEProducerStatus: prop.CTEProducerStatus, NoCopPushDown: true}, + &property.PhysicalProperty{ExpectedCnt: outerExpectedCnt, SortItems: prop.SortItems, CTEProducerStatus: prop.CTEProducerStatus, NoCopPushDown: true}, &property.PhysicalProperty{ExpectedCnt: math.MaxFloat64, CTEProducerStatus: prop.CTEProducerStatus, NoCopPushDown: prop.NoCopPushDown}) apply.SetSchema(la.Schema()) return []base.PhysicalPlan{apply}, true, nil diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index ef02666c31001..f5e189c355d72 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1285,7 +1285,10 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp // and has no correlated column from the current level plan(if the correlated column is from upper level, // we can treat it as constant, because the upper LogicalApply cannot be eliminated since current node is a join node), // and don't need to append a scalar value, we can rewrite it to inner join. - if planCtx.builder.ctx.GetSessionVars().GetAllowInSubqToJoinAndAgg() && !v.Not && !asScalar && len(corCols) == 0 && collFlag { + // When EnableCorrelateSubquery is ON, skip the InnerJoin+Agg rewrite so that a SemiJoin is built + // instead; the CorrelateSolver rule can then convert it to a correlated Apply with index lookups. + if planCtx.builder.ctx.GetSessionVars().GetAllowInSubqToJoinAndAgg() && !v.Not && !asScalar && len(corCols) == 0 && collFlag && + !planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { // We need to try to eliminate the agg and the projection produced by this operation. planCtx.builder.optFlag |= rule.FlagEliminateAgg planCtx.builder.optFlag |= rule.FlagEliminateProjection diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index e8fbb9650ba72..bc0cdfe69982c 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -109,6 +109,12 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba return nil, false, err } + // Reset stats on DataSources that received correlated conditions so DeriveStats + // re-runs during physical optimization. This is necessary because the original + // DeriveStats ran before the correlate rule added correlated conditions, so the + // index access paths were built without them. + resetStatsForCorrelatedDS(innerPlan) + // Build the LogicalApply. ap := logicalop.LogicalApply{}.Init(join.SCtx(), join.QueryBlockOffset()) ap.JoinType = join.JoinType @@ -161,6 +167,44 @@ func (*CorrelateSolver) buildCorrelatedCond( return cond, corCol } +// resetStatsForCorrelatedDS walks the inner subtree and clears StatsInfo on +// DataSources that have correlated conditions in AllConds, plus all ancestor +// plan nodes up to the root. This forces DeriveStats to re-run during physical +// optimization so that index access paths are rebuilt with the correlated +// conditions. Only DataSources with correlated conditions are reset to avoid +// issues with other DataSources that had their conditions overwritten by the +// second PPD pass. +func resetStatsForCorrelatedDS(p base.LogicalPlan) bool { + hasCorrelated := false + + // Check if this is a DataSource with correlated conditions. + if ds, ok := p.(*logicalop.DataSource); ok { + for _, cond := range ds.AllConds { + if len(expression.ExtractCorColumns(cond)) > 0 { + hasCorrelated = true + break + } + } + } + + // Recurse into children. + for _, child := range p.Children() { + if resetStatsForCorrelatedDS(child) { + hasCorrelated = true + } + } + + // Reset stats on this node if it or any descendant has correlated conditions. + // This ensures DeriveStats re-runs for the affected subtree path. + if hasCorrelated { + if blp, ok := p.GetBaseLogicalPlan().(*logicalop.BaseLogicalPlan); ok { + blp.SetStats(nil) + } + } + + return hasCorrelated +} + // Name implements base.LogicalOptRule.<1st> interface. func (*CorrelateSolver) Name() string { return "correlate" From 5ba3ccaaaad1a6e1c29c808668d11f3849bf86e2 Mon Sep 17 00:00:00 2001 From: tpp Date: Thu, 19 Feb 2026 13:58:55 -0800 Subject: [PATCH 06/24] refactor for cost based evaluation --- pkg/planner/core/casetest/rule/BUILD.bazel | 4 +- pkg/planner/core/casetest/rule/main_test.go | 2 +- .../rule/testdata/correlate_suite_out.json | 85 ++++---- .../rule/testdata/correlate_suite_xut.json | 85 ++++---- pkg/planner/core/core_init.go | 1 + pkg/planner/core/expression_rewriter.go | 19 +- pkg/planner/core/find_best_task.go | 96 +++++++++ .../core/operator/logicalop/logical_join.go | 9 + .../operator/physicalop/base_physical_plan.go | 5 + pkg/planner/core/rule_correlate.go | 197 +++++++++++++++++- .../util/utilfuncp/func_pointer_misc.go | 4 + 11 files changed, 420 insertions(+), 87 deletions(-) diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index 4369b89a04ef6..1943a51d4993b 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -6,8 +6,8 @@ go_test( srcs = [ "dual_test.go", "main_test.go", - "rule_correlate_test.go", "rule_cdc_join_reorder_test.go", + "rule_correlate_test.go", "rule_derive_topn_from_window_test.go", "rule_eliminate_empty_selection_test.go", "rule_eliminate_projection_test.go", @@ -20,7 +20,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 19, + shard_count = 20, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/rule/main_test.go b/pkg/planner/core/casetest/rule/main_test.go index af2a1bce41dbf..3bcb4504ec267 100644 --- a/pkg/planner/core/casetest/rule/main_test.go +++ b/pkg/planner/core/casetest/rule/main_test.go @@ -84,6 +84,6 @@ func GetCorrelateSuiteData() testdata.TestData { return testDataMap["correlate_suite"] } - func GetCDCJoinReorderSuiteData() testdata.TestData { +func GetCDCJoinReorderSuiteData() testdata.TestData { return testDataMap["cdc_join_reorder_suite"] } diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index 9ec6d2fdc6b17..92fef60462532 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -5,12 +5,13 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 9990.00 root data:Selection", - "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", - " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -23,8 +24,10 @@ "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", "├─TableReader(Build) 10000.00 root data:TableFullScan", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexReader(Probe) 100000.00 root index:IndexRangeScan", - " └─IndexRangeScan 100000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "3 3" @@ -37,8 +40,10 @@ "├─TableReader(Build) 9990.00 root data:Selection", "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", - " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "└─Limit(Probe) 9990.00 root offset:0, count:1", + " └─IndexReader 9990.00 root index:Limit", + " └─Limit 9990.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 9990.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -76,12 +81,14 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", "Plan": [ - "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", - "├─IndexReader(Build) 9990.00 root index:IndexFullScan", - "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", - "└─TableReader(Probe) 9990.00 root data:Selection", - " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─Selection 809900.00 cop[tikv] gt(test.t2.a, test.t1.a)", + " └─IndexFullScan 1012375.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" ], "Result": [ "1 1" @@ -90,13 +97,15 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", "Plan": [ - "HashJoin 7984.01 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)], other cond:gt(test.t2.b, test.t1.b)", - "├─TableReader(Build) 9980.01 root data:Selection", - "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", - "└─TableReader(Probe) 9980.01 root data:Selection", - " └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexLookUp 10000.00 root ", + " ├─IndexRangeScan(Build) 13375.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─Limit(Probe) 10000.00 cop[tikv] offset:0, count:1", + " └─Selection 10000.00 cop[tikv] gt(test.t2.b, test.t1.b)", + " └─TableRowIDScan 13375.00 cop[tikv] table:t2 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -122,19 +131,17 @@ { "SQL": "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", "Plan": [ - "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 9990.00 root data:Selection", - "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexHashJoin(Probe) 124750.12 root inner join, inner:IndexLookUp, outer key:test.t2.b, inner key:test.t3.a, equal cond:eq(test.t2.b, test.t3.a)", - " ├─IndexLookUp(Build) 99800.10 root ", - " │ ├─IndexRangeScan(Build) 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", - " │ └─Selection(Probe) 99800.10 cop[tikv] not(isnull(test.t2.b))", - " │ └─TableRowIDScan 99900.00 cop[tikv] table:t2 keep order:false, stats:pseudo", - " └─IndexLookUp(Probe) 124750.12 root ", - " ├─Selection(Build) 124750.12 cop[tikv] not(isnull(test.t3.a))", - " │ └─IndexRangeScan 124875.00 cop[tikv] table:t3, index:a(a) range: decided by [eq(test.t3.a, test.t2.b)], keep order:false, stats:pseudo", - " └─TableRowIDScan(Probe) 124750.12 cop[tikv] table:t3 keep order:false, stats:pseudo" + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─HashJoin(Build) 4162.50 root inner join, equal:[eq(test.t3.a, test.t2.b)]", + "│ ├─TableReader(Build) 3330.00 root data:Selection", + "│ │ └─Selection 3330.00 cop[tikv] gt(test.t3.b, 0), not(isnull(test.t3.a))", + "│ │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + "│ └─TableReader(Probe) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -150,8 +157,10 @@ " │ └─IndexLookUp 10.00 root ", " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - " └─IndexReader(Probe) 100.00 root index:IndexRangeScan", - " └─IndexRangeScan 100.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + " └─Limit(Probe) 10.00 root offset:0, count:1", + " └─IndexReader 10.00 root index:Limit", + " └─Limit 10.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index 9ec6d2fdc6b17..92fef60462532 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -5,12 +5,13 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 9990.00 root data:Selection", - "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", - " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -23,8 +24,10 @@ "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", "├─TableReader(Build) 10000.00 root data:TableFullScan", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexReader(Probe) 100000.00 root index:IndexRangeScan", - " └─IndexRangeScan 100000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "3 3" @@ -37,8 +40,10 @@ "├─TableReader(Build) 9990.00 root data:Selection", "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexReader(Probe) 99900.00 root index:IndexRangeScan", - " └─IndexRangeScan 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "└─Limit(Probe) 9990.00 root offset:0, count:1", + " └─IndexReader 9990.00 root index:Limit", + " └─Limit 9990.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 9990.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -76,12 +81,14 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", "Plan": [ - "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", - "├─IndexReader(Build) 9990.00 root index:IndexFullScan", - "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", - "└─TableReader(Probe) 9990.00 root data:Selection", - " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexReader 10000.00 root index:Limit", + " └─Limit 10000.00 cop[tikv] offset:0, count:1", + " └─Selection 809900.00 cop[tikv] gt(test.t2.a, test.t1.a)", + " └─IndexFullScan 1012375.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" ], "Result": [ "1 1" @@ -90,13 +97,15 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", "Plan": [ - "HashJoin 7984.01 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)], other cond:gt(test.t2.b, test.t1.b)", - "├─TableReader(Build) 9980.01 root data:Selection", - "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", - "└─TableReader(Probe) 9980.01 root data:Selection", - " └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Limit(Probe) 10000.00 root offset:0, count:1", + " └─IndexLookUp 10000.00 root ", + " ├─IndexRangeScan(Build) 13375.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─Limit(Probe) 10000.00 cop[tikv] offset:0, count:1", + " └─Selection 10000.00 cop[tikv] gt(test.t2.b, test.t1.b)", + " └─TableRowIDScan 13375.00 cop[tikv] table:t2 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -122,19 +131,17 @@ { "SQL": "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", "Plan": [ - "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 9990.00 root data:Selection", - "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─IndexHashJoin(Probe) 124750.12 root inner join, inner:IndexLookUp, outer key:test.t2.b, inner key:test.t3.a, equal cond:eq(test.t2.b, test.t3.a)", - " ├─IndexLookUp(Build) 99800.10 root ", - " │ ├─IndexRangeScan(Build) 99900.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", - " │ └─Selection(Probe) 99800.10 cop[tikv] not(isnull(test.t2.b))", - " │ └─TableRowIDScan 99900.00 cop[tikv] table:t2 keep order:false, stats:pseudo", - " └─IndexLookUp(Probe) 124750.12 root ", - " ├─Selection(Build) 124750.12 cop[tikv] not(isnull(test.t3.a))", - " │ └─IndexRangeScan 124875.00 cop[tikv] table:t3, index:a(a) range: decided by [eq(test.t3.a, test.t2.b)], keep order:false, stats:pseudo", - " └─TableRowIDScan(Probe) 124750.12 cop[tikv] table:t3 keep order:false, stats:pseudo" + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─HashJoin(Build) 4162.50 root inner join, equal:[eq(test.t3.a, test.t2.b)]", + "│ ├─TableReader(Build) 3330.00 root data:Selection", + "│ │ └─Selection 3330.00 cop[tikv] gt(test.t3.b, 0), not(isnull(test.t3.a))", + "│ │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + "│ └─TableReader(Probe) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -150,8 +157,10 @@ " │ └─IndexLookUp 10.00 root ", " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - " └─IndexReader(Probe) 100.00 root index:IndexRangeScan", - " └─IndexRangeScan 100.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + " └─Limit(Probe) 10.00 root offset:0, count:1", + " └─IndexReader 10.00 root index:Limit", + " └─Limit 10.00 cop[tikv] offset:0, count:1", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" ], "Result": [ "1 1", diff --git a/pkg/planner/core/core_init.go b/pkg/planner/core/core_init.go index 0c4888db7af92..4d540a7a4a8ba 100644 --- a/pkg/planner/core/core_init.go +++ b/pkg/planner/core/core_init.go @@ -30,6 +30,7 @@ func init() { // For code refactor init. utilfuncp.FindBestTask4BaseLogicalPlan = findBestTask utilfuncp.FindBestTask4LogicalDataSource = findBestTask4LogicalDataSource + utilfuncp.FindBestTask4LogicalJoin = findBestTask4LogicalJoin utilfuncp.ExhaustPhysicalPlans4LogicalJoin = exhaustPhysicalPlans4LogicalJoin utilfuncp.ExhaustPhysicalPlans4LogicalApply = exhaustPhysicalPlans4LogicalApply diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index f5e189c355d72..9a2df7add2fae 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1078,6 +1078,11 @@ func (er *expressionRewriter) handleExistSubquery(ctx context.Context, planCtx * // Add LIMIT 1 when noDecorrelate is true for EXISTS subqueries to enable early exit corCols := coreusage.ExtractCorColumnsBySchema4LogicalPlan(np, planCtx.plan.Schema()) noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingExistsSubquery) + // When EnableCorrelateSubquery is ON, prevent decorrelation of correlated + // subqueries so they stay as Apply with index lookups. + if !noDecorrelate && b.ctx.GetSessionVars().EnableCorrelateSubquery && len(corCols) > 0 { + noDecorrelate = true + } if noDecorrelate { // Only add LIMIT 1 if the query doesn't already contain a LIMIT clause if !hasLimit(np) { @@ -1094,7 +1099,7 @@ func (er *expressionRewriter) handleExistSubquery(ctx context.Context, planCtx * } np = er.popExistsSubPlan(planCtx, np) semiJoinRewrite := hintFlags&hint.HintFlagSemiJoinRewrite > 0 - if semiJoinRewrite && noDecorrelate { + if semiJoinRewrite && hintFlags&hint.HintFlagNoDecorrelate > 0 { b.ctx.GetSessionVars().StmtCtx.SetHintWarning( "NO_DECORRELATE() and SEMI_JOIN_REWRITE() are in conflict. Both will be ineffective.") noDecorrelate = false @@ -1280,6 +1285,11 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp collFlag := collate.CompatibleCollate(lt.GetCollate(), rt.GetCollate()) corCols := coreusage.ExtractCorColumnsBySchema4LogicalPlan(np, planCtx.plan.Schema()) noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingInSubquery) + // When EnableCorrelateSubquery is ON, prevent decorrelation of correlated + // IN subqueries so they stay as Apply with index lookups. + if !noDecorrelate && planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery && len(corCols) > 0 && !v.Not { + noDecorrelate = true + } // If it's not the form of `not in (SUBQUERY)`, // and has no correlated column from the current level plan(if the correlated column is from upper level, @@ -1324,6 +1334,13 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp if er.err != nil { return v, true } + // When EnableCorrelateSubquery is ON and the subquery is non-correlated, + // mark the join so that CorrelateSolver converts it to a correlated Apply. + if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery && len(corCols) == 0 && !v.Not { + if ap, ok := planCtx.plan.(*logicalop.LogicalApply); ok { + ap.PreferCorrelate = true + } + } } er.ctxStackPop(1) diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index be2502b3187f3..8473ef32b3d57 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -3005,6 +3005,102 @@ func ExhaustPhysicalPlans4MockLogicalPlan(p *mockLogicalPlan4Test, prop *propert return append(plan1, plan2...), true, nil } +// findBestTask4LogicalJoin handles LogicalJoin nodes that have a CorrelateAlternative. +// It compares the Join path and the Apply path via CBO and returns the cheaper one. +// If any panic occurs, it falls back to the standard findBestTask behavior. +func findBestTask4LogicalJoin(super base.LogicalPlan, prop *property.PhysicalProperty) (bestTask base.Task, retErr error) { + defer func() { + if r := recover(); r != nil { + logutil.BgLogger().Warn("findBestTask4LogicalJoin panic, falling back to join task", + zap.Any("recover", r), + zap.Stack("stack")) + // Disable the correlate alternative so future calls use the standard path. + _, self := getGEAndSelf(super) + join := self.(*logicalop.LogicalJoin) + join.CorrelateAlternative = nil + // Return the cached join task if available (from step 1), otherwise invalid. + // Do NOT retry findBestTask here — the Apply alternative's DeriveStats may + // have corrupted shared AccessPath objects, making a retry unsafe. + p := self.GetBaseLogicalPlan().(*logicalop.BaseLogicalPlan) + if cached := p.GetTask(prop); cached != nil { + bestTask = cached + retErr = nil + } else { + bestTask = base.InvalidTask + retErr = nil + } + } + }() + + _, self := getGEAndSelf(super) + join := self.(*logicalop.LogicalJoin) + p := self.GetBaseLogicalPlan().(*logicalop.BaseLogicalPlan) + + if prop == nil { + return nil, nil + } + + // Cache check: if winner was already computed for this prop, return it. + if cached := p.GetTask(prop); cached != nil { + return cached, nil + } + + // Step 1: Get the Join path's best task (caches on p internally). + joinTask, err := findBestTask(super, prop) + if err != nil { + return nil, err + } + + // Step 2: Try the Apply alternative. The Apply path may encounter issues + // (e.g., unsupported operator types in the inner subtree, correlated + // conditions that confuse the ranger, etc.). Use a recovery mechanism to + // ensure the Join path is always available as a safe fallback. + applyTask := tryCorrelateAlternative(join, prop) + + // Step 3: Compare and cache the winner. + bestTask = joinTask + if applyTask != nil { + if applyIsBetter, err := compareTaskCost(applyTask, joinTask); err != nil { + return nil, err + } else if applyIsBetter { + bestTask = applyTask + } + } + + // Overwrite cache with winner (findBestTask cached joinTask; overwrite if apply won). + p.StoreTask(prop, bestTask) + return bestTask, nil +} + +// tryCorrelateAlternative evaluates the Apply alternative for a LogicalJoin. +// Returns the Apply task on success, or nil if the Apply path fails for any reason. +func tryCorrelateAlternative(join *logicalop.LogicalJoin, prop *property.PhysicalProperty) (result base.Task) { + defer func() { + if r := recover(); r != nil { + logutil.BgLogger().Warn("correlate alternative failed, falling back to join", + zap.Any("recover", r), + zap.Stack("stack")) + result = nil + } + }() + + ap := join.CorrelateAlternative + + // Derive stats — inner child needs fresh stats (cleared by resetStatsForCorrelatedDS). + if _, _, err := ap.RecursiveDeriveStats(nil); err != nil { + return nil + } + preparePossibleProperties(ap) + + // Get the Apply path's best task. + applyTask, err := physicalop.FindBestTask(ap, prop) + if err != nil { + return nil + } + + return applyTask +} + type mockPhysicalPlan4Test struct { physicalop.BasePhysicalPlan // 1 or 2 for physicalPlan1 or physicalPlan2. diff --git a/pkg/planner/core/operator/logicalop/logical_join.go b/pkg/planner/core/operator/logicalop/logical_join.go index 73f50e140284f..4bf632b60d080 100644 --- a/pkg/planner/core/operator/logicalop/logical_join.go +++ b/pkg/planner/core/operator/logicalop/logical_join.go @@ -91,6 +91,15 @@ type LogicalJoin struct { FullSchema *expression.Schema FullNames types.NameSlice + // PreferCorrelate is set to true when this SemiJoin originated from a non-correlated + // IN subquery with EnableCorrelateSubquery=ON, indicating that the CorrelateSolver + // should convert it back to a correlated Apply with index lookups. + PreferCorrelate bool + + // CorrelateAlternative holds the LogicalApply alternative built by CorrelateSolver. + // When non-nil, findBestTask compares both Join and Apply paths and picks the cheaper one. + CorrelateAlternative *LogicalApply + // EqualCondOutCnt indicates the estimated count of joined rows after evaluating `EqualConditions`. EqualCondOutCnt float64 diff --git a/pkg/planner/core/operator/physicalop/base_physical_plan.go b/pkg/planner/core/operator/physicalop/base_physical_plan.go index 4ca5a05e3e4f5..dc11c7f383d95 100644 --- a/pkg/planner/core/operator/physicalop/base_physical_plan.go +++ b/pkg/planner/core/operator/physicalop/base_physical_plan.go @@ -492,6 +492,11 @@ func FindBestTask(e base.LogicalPlan, prop *property.PhysicalProperty) (bestTask return findBestTask4LogicalShowDDLJobs(e, prop) case *logicalop.MockDataSource: return findBestTask4LogicalMockDatasource(lop, prop) + case *logicalop.LogicalJoin: + if lop.CorrelateAlternative != nil { + return utilfuncp.FindBestTask4LogicalJoin(e, prop) + } + return utilfuncp.FindBestTask4BaseLogicalPlan(e, prop) default: return utilfuncp.FindBestTask4BaseLogicalPlan(e, prop) } diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index bc0cdfe69982c..1b319e161b18f 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/planner/core/base" "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" + "github.com/pingcap/tidb/pkg/planner/util" "github.com/pingcap/tidb/pkg/types" ) @@ -30,7 +31,15 @@ import ( type CorrelateSolver struct{} // Optimize implements base.LogicalOptRule.<0th> interface. -func (s *CorrelateSolver) Optimize(ctx context.Context, p base.LogicalPlan) (base.LogicalPlan, bool, error) { +func (s *CorrelateSolver) Optimize(ctx context.Context, p base.LogicalPlan) (retPlan base.LogicalPlan, retChanged bool, retErr error) { + defer func() { + if r := recover(); r != nil { + // If correlate panics, return the original plan unchanged. + retPlan = p + retChanged = false + retErr = nil + } + }() return s.correlate(ctx, p) } @@ -58,9 +67,10 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba return p, planChanged, nil } - // Check if this node is a LogicalJoin with a semi-join type. + // Check if this node is a LogicalJoin with a semi-join type that was + // marked for re-correlation (from a non-correlated IN subquery). join, isJoin := p.(*logicalop.LogicalJoin) - if !isJoin || !join.JoinType.IsSemiJoin() { + if !isJoin || !join.JoinType.IsSemiJoin() || !join.PreferCorrelate { return p, planChanged, nil } @@ -94,10 +104,15 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba // Move RightConditions to the selection (they reference only the inner side). selConds = append(selConds, join.RightConditions...) - // Build the LogicalSelection on the inner (right) child. - innerChild := join.Children()[1] + // Clone the inner subtree so PPD can modify the clone without affecting + // the Join's inner child (which must retain its original conditions). + // If the subtree contains an unhandled operator type, abort to avoid corruption. + clonedInner, ok := cloneLogicalSubtree(join.Children()[1]) + if !ok { + return p, planChanged, nil + } sel := logicalop.LogicalSelection{Conditions: selConds}.Init(join.SCtx(), join.QueryBlockOffset()) - sel.SetChildren(innerChild) + sel.SetChildren(clonedInner) // Run predicate push-down on the inner subtree so the new correlated // predicates reach the DataSource (for index access path selection). @@ -115,6 +130,17 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba // index access paths were built without them. resetStatsForCorrelatedDS(innerPlan) + // For semi-join semantics (EXISTS/IN and NOT EXISTS/NOT IN), add Limit 1 on + // the inner side. The Apply executor materializes all inner rows per outer + // key via fetchAllInners; a Limit 1 enables early exit since semi/anti-semi + // joins only need to know whether any matching row exists. + // This mirrors what expression_rewriter does for NO_DECORRELATE EXISTS. + if !hasLimit(innerPlan) { + limit := logicalop.LogicalLimit{Count: 1}.Init(join.SCtx(), join.QueryBlockOffset()) + limit.SetChildren(innerPlan) + innerPlan = limit + } + // Build the LogicalApply. ap := logicalop.LogicalApply{}.Init(join.SCtx(), join.QueryBlockOffset()) ap.JoinType = join.JoinType @@ -123,7 +149,10 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba ap.SetSchema(join.Schema().Clone()) ap.SetOutputNames(join.OutputNames()) - return ap, true, nil + // Store the Apply alternative on the Join for cost-based selection during + // physical optimization, rather than unconditionally choosing Apply. + join.CorrelateAlternative = ap + return p, true, nil } // buildCorrelatedCond converts an equal condition from the join into a correlated condition @@ -167,6 +196,160 @@ func (*CorrelateSolver) buildCorrelatedCond( return cond, corCol } +// cloneLogicalSubtree creates a shallow clone of the logical plan subtree, +// ensuring each node has a fresh plan ID and independent mutable state (children, +// conditions, AllConds). Immutable data (table info, column info, etc.) is shared. +// This is used to build the Apply alternative's inner plan without modifying the +// Join's original inner subtree when PPD pushes correlated conditions down. +// Returns (clone, true) on success, or (nil, false) if an unhandled operator type +// is encountered. In the failure case, the caller must abort the correlate +// optimization to avoid corrupting the original subtree. +func cloneLogicalSubtree(p base.LogicalPlan) (base.LogicalPlan, bool) { + switch op := p.(type) { + case *logicalop.DataSource: + return cloneDataSource(op), true + case *logicalop.LogicalJoin: + return cloneJoin(op) + case *logicalop.LogicalSelection: + return cloneSelection(op) + case *logicalop.LogicalProjection: + return cloneProjection(op) + case *logicalop.LogicalAggregation: + return cloneAggregation(op) + case *logicalop.LogicalLimit: + return cloneLimit(op) + default: + // Unknown operator type — cannot safely clone. Return failure + // so the caller aborts the correlate optimization. + return nil, false + } +} + +func cloneWithChildren(p base.LogicalPlan) ([]base.LogicalPlan, bool) { + children := make([]base.LogicalPlan, len(p.Children())) + for i, child := range p.Children() { + cloned, ok := cloneLogicalSubtree(child) + if !ok { + return nil, false + } + children[i] = cloned + } + return children, true +} + +func cloneDataSource(ds *logicalop.DataSource) *logicalop.DataSource { + clone := *ds + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + ds.SCtx(), ds.TP(), &clone, ds.QueryBlockOffset()) + clone.SetSchema(ds.Schema().Clone()) + // Independent slices that PPD replaces. + clone.AllConds = append([]expression.Expression(nil), ds.AllConds...) + clone.PushedDownConds = append([]expression.Expression(nil), ds.PushedDownConds...) + // Create fresh AccessPath objects for the clone so that fillIndexPath + // (called during DeriveStats) populates independent copies and does not + // corrupt the original DataSource's paths. Only structural identity + // fields are copied; analysis fields (Ranges, AccessConds, etc.) are + // left at zero so fillIndexPath starts from a clean state. + clone.AllPossibleAccessPaths = make([]*util.AccessPath, len(ds.AllPossibleAccessPaths)) + for i, p := range ds.AllPossibleAccessPaths { + clone.AllPossibleAccessPaths[i] = freshAccessPath(p) + } + // PossibleAccessPaths must reference the same objects as AllPossibleAccessPaths + // so that fillIndexPath modifications during DeriveStats are visible. Without + // this, PossibleAccessPaths retains fresh paths with nil Ranges, causing + // the DataSource to be incorrectly converted to TableDual. + clone.PossibleAccessPaths = append([]*util.AccessPath(nil), clone.AllPossibleAccessPaths...) + return &clone +} + +func cloneJoin(j *logicalop.LogicalJoin) (*logicalop.LogicalJoin, bool) { + children, ok := cloneWithChildren(j) + if !ok { + return nil, false + } + clone := *j + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + j.SCtx(), j.TP(), &clone, j.QueryBlockOffset()) + clone.SetSchema(j.Schema().Clone()) + // Independent condition slices that PPD may modify. + clone.EqualConditions = append([]*expression.ScalarFunction(nil), j.EqualConditions...) + clone.LeftConditions = append(expression.CNFExprs(nil), j.LeftConditions...) + clone.RightConditions = append(expression.CNFExprs(nil), j.RightConditions...) + clone.OtherConditions = append(expression.CNFExprs(nil), j.OtherConditions...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneSelection(s *logicalop.LogicalSelection) (*logicalop.LogicalSelection, bool) { + children, ok := cloneWithChildren(s) + if !ok { + return nil, false + } + clone := *s + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) + clone.Conditions = append(expression.CNFExprs(nil), s.Conditions...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneProjection(proj *logicalop.LogicalProjection) (*logicalop.LogicalProjection, bool) { + children, ok := cloneWithChildren(proj) + if !ok { + return nil, false + } + clone := *proj + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + proj.SCtx(), proj.TP(), &clone, proj.QueryBlockOffset()) + clone.SetSchema(proj.Schema().Clone()) + clone.SetChildren(children...) + return &clone, true +} + +func cloneAggregation(agg *logicalop.LogicalAggregation) (*logicalop.LogicalAggregation, bool) { + children, ok := cloneWithChildren(agg) + if !ok { + return nil, false + } + clone := *agg + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + agg.SCtx(), agg.TP(), &clone, agg.QueryBlockOffset()) + clone.SetSchema(agg.Schema().Clone()) + clone.SetChildren(children...) + return &clone, true +} + +func cloneLimit(lim *logicalop.LogicalLimit) (*logicalop.LogicalLimit, bool) { + children, ok := cloneWithChildren(lim) + if !ok { + return nil, false + } + clone := *lim + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + lim.SCtx(), lim.TP(), &clone, lim.QueryBlockOffset()) + clone.SetSchema(lim.Schema().Clone()) + clone.SetChildren(children...) + return &clone, true +} + +// freshAccessPath creates a new AccessPath with only the structural identity +// fields from the source path (Index, StoreType, handle flags, hint flags). +// Analysis fields (Ranges, AccessConds, IdxCols, etc.) are left at zero so +// that fillIndexPath / deriveTablePathStats start from a clean state. +func freshAccessPath(src *util.AccessPath) *util.AccessPath { + return &util.AccessPath{ + Index: src.Index, + StoreType: src.StoreType, + IsIntHandlePath: src.IsIntHandlePath, + IsCommonHandlePath: src.IsCommonHandlePath, + Forced: src.Forced, + ForceKeepOrder: src.ForceKeepOrder, + ForceNoKeepOrder: src.ForceNoKeepOrder, + ForcePartialOrder: src.ForcePartialOrder, + IsUkShardIndexPath: src.IsUkShardIndexPath, + } +} + // resetStatsForCorrelatedDS walks the inner subtree and clears StatsInfo on // DataSources that have correlated conditions in AllConds, plus all ancestor // plan nodes up to the root. This forces DeriveStats to re-run during physical diff --git a/pkg/planner/util/utilfuncp/func_pointer_misc.go b/pkg/planner/util/utilfuncp/func_pointer_misc.go index 73e4931bb4e4b..35d2258eda0cc 100644 --- a/pkg/planner/util/utilfuncp/func_pointer_misc.go +++ b/pkg/planner/util/utilfuncp/func_pointer_misc.go @@ -42,6 +42,10 @@ var FindBestTask4BaseLogicalPlan func(p base.LogicalPlan, var FindBestTask4LogicalDataSource func(lp base.LogicalPlan, prop *property.PhysicalProperty) (t base.Task, err error) +// FindBestTask4LogicalJoin handles LogicalJoin with a correlate alternative. +var FindBestTask4LogicalJoin func(p base.LogicalPlan, + prop *property.PhysicalProperty) (bestTask base.Task, err error) + // ExhaustPhysicalPlans4LogicalJoin will be called by LogicalJoin in logicalOp pkg. var ExhaustPhysicalPlans4LogicalJoin func(lp base.LogicalPlan, prop *property.PhysicalProperty) ( []base.PhysicalPlan, bool, error) From a0631a57525eb124b19578bb43b864121ed4f3a6 Mon Sep 17 00:00:00 2001 From: tpp Date: Fri, 20 Feb 2026 15:44:41 -0800 Subject: [PATCH 07/24] refactor for 2nd customer example --- pkg/bindinfo/binding_auto_test.go | 26 ++++++ .../rule/testdata/correlate_suite_out.json | 30 +++--- .../rule/testdata/correlate_suite_xut.json | 30 +++--- pkg/planner/core/expression_rewriter.go | 32 +++++-- pkg/planner/core/rule_correlate.go | 92 +++++++++++++++---- 5 files changed, 150 insertions(+), 60 deletions(-) diff --git a/pkg/bindinfo/binding_auto_test.go b/pkg/bindinfo/binding_auto_test.go index c150182247463..e982360e81517 100644 --- a/pkg/bindinfo/binding_auto_test.go +++ b/pkg/bindinfo/binding_auto_test.go @@ -16,12 +16,14 @@ package bindinfo_test import ( "fmt" + "slices" "strings" "testing" "github.com/pingcap/tidb/pkg/bindinfo" "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/auth" + "github.com/pingcap/tidb/pkg/sessionctx/vardef" "github.com/pingcap/tidb/pkg/testkit" "github.com/pingcap/tidb/pkg/testkit/testdata" "github.com/stretchr/testify/require" @@ -211,6 +213,30 @@ func TestRelevantOptVarsAndFixes(t *testing.T) { } } +func TestRelevantOptVarsCorrelateSubquery(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`create table t1 (a int, b int, key(a))`) + tk.MustExec(`create table t2 (a int, b int, key(a))`) + + p := parser.New() + sql := "select * from t1 where a in (select a from t2)" + + // The variable is recorded as relevant regardless of whether it is ON or OFF, + // because the code path where the variable affects plan choice was reached. + for _, enabled := range []string{"OFF", "ON"} { + tk.MustExec("set tidb_opt_enable_correlate_subquery = " + enabled) + p.Reset() + stmt, err := p.ParseOneStmt(sql, "", "") + require.NoError(t, err) + vars, _, err := bindinfo.RecordRelevantOptVarsAndFixes(tk.Session(), stmt) + require.NoError(t, err) + require.True(t, slices.Contains(vars, vardef.TiDBOptEnableCorrelateSubquery), + "enabled=%s: expected %s in recorded vars %v", enabled, vardef.TiDBOptEnableCorrelateSubquery, vars) + } +} + func TestExplainExploreAnalyze(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index 92fef60462532..2a586822b536d 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -36,14 +36,12 @@ { "SQL": "select * from t1 where a in (select a from t2)", "Plan": [ - "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 9990.00 root data:Selection", - "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 9990.00 root offset:0, count:1", - " └─IndexReader 9990.00 root index:Limit", - " └─Limit 9990.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 9990.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -152,15 +150,13 @@ "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", "Plan": [ "Limit 10.00 root offset:0, count:10", - "└─Apply 10.00 root CARTESIAN semi join, left side:Projection", - " ├─Projection(Build) 10.00 root test.t1.a, test.t1.b", - " │ └─IndexLookUp 10.00 root ", - " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", - " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - " └─Limit(Probe) 10.00 root offset:0, count:1", - " └─IndexReader 10.00 root index:Limit", - " └─Limit 10.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "└─MergeJoin 10.00 root semi join, left side:Projection, left key:test.t1.a, right key:test.t2.a", + " ├─IndexReader(Build) 12.50 root index:IndexFullScan", + " │ └─IndexFullScan 12.50 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + " └─Projection(Probe) 12.50 root test.t1.a, test.t1.b", + " └─IndexLookUp 12.50 root ", + " ├─IndexFullScan(Build) 12.50 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " └─TableRowIDScan(Probe) 12.50 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index 92fef60462532..2a586822b536d 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -36,14 +36,12 @@ { "SQL": "select * from t1 where a in (select a from t2)", "Plan": [ - "Apply 9990.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 9990.00 root data:Selection", - "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 9990.00 root offset:0, count:1", - " └─IndexReader 9990.00 root index:Limit", - " └─Limit 9990.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 9990.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -152,15 +150,13 @@ "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", "Plan": [ "Limit 10.00 root offset:0, count:10", - "└─Apply 10.00 root CARTESIAN semi join, left side:Projection", - " ├─Projection(Build) 10.00 root test.t1.a, test.t1.b", - " │ └─IndexLookUp 10.00 root ", - " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", - " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - " └─Limit(Probe) 10.00 root offset:0, count:1", - " └─IndexReader 10.00 root index:Limit", - " └─Limit 10.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "└─MergeJoin 10.00 root semi join, left side:Projection, left key:test.t1.a, right key:test.t2.a", + " ├─IndexReader(Build) 12.50 root index:IndexFullScan", + " │ └─IndexFullScan 12.50 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + " └─Projection(Probe) 12.50 root test.t1.a, test.t1.b", + " └─IndexLookUp 12.50 root ", + " ├─IndexFullScan(Build) 12.50 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " └─TableRowIDScan(Probe) 12.50 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 9a2df7add2fae..e12d93a8ee1f2 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1080,8 +1080,11 @@ func (er *expressionRewriter) handleExistSubquery(ctx context.Context, planCtx * noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingExistsSubquery) // When EnableCorrelateSubquery is ON, prevent decorrelation of correlated // subqueries so they stay as Apply with index lookups. - if !noDecorrelate && b.ctx.GetSessionVars().EnableCorrelateSubquery && len(corCols) > 0 { - noDecorrelate = true + if !noDecorrelate && len(corCols) > 0 { + b.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + if b.ctx.GetSessionVars().EnableCorrelateSubquery { + noDecorrelate = true + } } if noDecorrelate { // Only add LIMIT 1 if the query doesn't already contain a LIMIT clause @@ -1287,8 +1290,11 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingInSubquery) // When EnableCorrelateSubquery is ON, prevent decorrelation of correlated // IN subqueries so they stay as Apply with index lookups. - if !noDecorrelate && planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery && len(corCols) > 0 && !v.Not { - noDecorrelate = true + if !noDecorrelate && len(corCols) > 0 && !v.Not { + planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { + noDecorrelate = true + } } // If it's not the form of `not in (SUBQUERY)`, @@ -1297,8 +1303,13 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp // and don't need to append a scalar value, we can rewrite it to inner join. // When EnableCorrelateSubquery is ON, skip the InnerJoin+Agg rewrite so that a SemiJoin is built // instead; the CorrelateSolver rule can then convert it to a correlated Apply with index lookups. - if planCtx.builder.ctx.GetSessionVars().GetAllowInSubqToJoinAndAgg() && !v.Not && !asScalar && len(corCols) == 0 && collFlag && - !planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { + canRewriteToJoinAgg := planCtx.builder.ctx.GetSessionVars().GetAllowInSubqToJoinAndAgg() && !v.Not && !asScalar && len(corCols) == 0 && collFlag + if canRewriteToJoinAgg { + // Record that the correlate variable is relevant — toggling it changes + // whether we take the InnerJoin+Agg path or the SemiApply path. + planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + } + if canRewriteToJoinAgg && !planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { // We need to try to eliminate the agg and the projection produced by this operation. planCtx.builder.optFlag |= rule.FlagEliminateAgg planCtx.builder.optFlag |= rule.FlagEliminateProjection @@ -1336,9 +1347,12 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp } // When EnableCorrelateSubquery is ON and the subquery is non-correlated, // mark the join so that CorrelateSolver converts it to a correlated Apply. - if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery && len(corCols) == 0 && !v.Not { - if ap, ok := planCtx.plan.(*logicalop.LogicalApply); ok { - ap.PreferCorrelate = true + if len(corCols) == 0 && !v.Not { + planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { + if ap, ok := planCtx.plan.(*logicalop.LogicalApply); ok { + ap.PreferCorrelate = true + } } } } diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 1b319e161b18f..0d37aea95b371 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -111,6 +111,16 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba if !ok { return p, planChanged, nil } + + // Lift DataSource conditions back into Selection nodes. The original PPD + // pushed conditions all the way into DataSource.AllConds and cleared them + // from ancestor operators (e.g., Join.RightConditions). When we re-run PPD + // below, the Join re-collects conditions from its own fields (not from + // DataSource.AllConds), so conditions that were pushed past the Join would + // be lost. Wrapping each DataSource in a Selection restores the pre-PPD + // state so the re-run can properly redistribute all conditions. + clonedInner = liftDataSourceConds(clonedInner) + sel := logicalop.LogicalSelection{Conditions: selConds}.Init(join.SCtx(), join.QueryBlockOffset()) sel.SetChildren(clonedInner) @@ -121,7 +131,9 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba // side could only do full scans. _, innerPlan, err := sel.PredicatePushDown(nil) if err != nil { - return nil, false, err + // PPD failed (e.g., conditions reference columns pruned from the + // DataSource schema); abort the correlate optimization. + return p, planChanged, nil } // Reset stats on DataSources that received correlated conditions so DeriveStats @@ -245,20 +257,19 @@ func cloneDataSource(ds *logicalop.DataSource) *logicalop.DataSource { // Independent slices that PPD replaces. clone.AllConds = append([]expression.Expression(nil), ds.AllConds...) clone.PushedDownConds = append([]expression.Expression(nil), ds.PushedDownConds...) - // Create fresh AccessPath objects for the clone so that fillIndexPath - // (called during DeriveStats) populates independent copies and does not - // corrupt the original DataSource's paths. Only structural identity - // fields are copied; analysis fields (Ranges, AccessConds, etc.) are - // left at zero so fillIndexPath starts from a clean state. - clone.AllPossibleAccessPaths = make([]*util.AccessPath, len(ds.AllPossibleAccessPaths)) - for i, p := range ds.AllPossibleAccessPaths { - clone.AllPossibleAccessPaths[i] = freshAccessPath(p) + // Share the original AccessPaths. DeriveStats returns early for DataSources + // with preserved stats (below), so these paths won't be modified. For + // DataSources that receive correlated conditions, resetStatsForCorrelatedDS + // creates fresh AccessPaths and clears stats before DeriveStats re-runs. + clone.AllPossibleAccessPaths = append([]*util.AccessPath(nil), ds.AllPossibleAccessPaths...) + clone.PossibleAccessPaths = append([]*util.AccessPath(nil), ds.PossibleAccessPaths...) + // Preserve original stats so DeriveStats returns early for DataSources + // that don't receive correlated conditions. Without this, DeriveStats + // re-runs fillIndexPath on all DataSources, which fails when conditions + // reference columns that column pruning removed from the schema. + if origStats := ds.StatsInfo(); origStats != nil { + clone.SetStats(origStats) } - // PossibleAccessPaths must reference the same objects as AllPossibleAccessPaths - // so that fillIndexPath modifications during DeriveStats are visible. Without - // this, PossibleAccessPaths retains fresh paths with nil Ranges, causing - // the DataSource to be incorrectly converted to TableDual. - clone.PossibleAccessPaths = append([]*util.AccessPath(nil), clone.AllPossibleAccessPaths...) return &clone } @@ -350,13 +361,49 @@ func freshAccessPath(src *util.AccessPath) *util.AccessPath { } } +// liftDataSourceConds walks the plan tree and for each DataSource with +// non-empty AllConds, wraps it in a Selection node containing those conditions. +// This "un-pushes" conditions that the original PPD pushed into DataSources, +// so that a subsequent PPD re-run (in correlate()) can properly redistribute +// all conditions — including those that would otherwise be silently dropped +// when DataSource.PredicatePushDown overwrites AllConds. +func liftDataSourceConds(p base.LogicalPlan) base.LogicalPlan { + // Recurse into children first, potentially replacing them. + for i, child := range p.Children() { + newChild := liftDataSourceConds(child) + if newChild != child { + p.Children()[i] = newChild + } + } + + // If this is a DataSource with AllConds, wrap it in a Selection. + if ds, ok := p.(*logicalop.DataSource); ok && len(ds.AllConds) > 0 { + sel := logicalop.LogicalSelection{ + Conditions: ds.AllConds, + }.Init(ds.SCtx(), ds.QueryBlockOffset()) + sel.SetChildren(ds) + + // Clear DataSource conditions; the PPD re-run will push them back. + ds.AllConds = nil + ds.PushedDownConds = nil + + return sel + } + + return p +} + // resetStatsForCorrelatedDS walks the inner subtree and clears StatsInfo on // DataSources that have correlated conditions in AllConds, plus all ancestor // plan nodes up to the root. This forces DeriveStats to re-run during physical // optimization so that index access paths are rebuilt with the correlated -// conditions. Only DataSources with correlated conditions are reset to avoid -// issues with other DataSources that had their conditions overwritten by the -// second PPD pass. +// conditions. +// +// For correlated DataSources, fresh AccessPaths are created so fillIndexPath +// starts from a clean state with the new correlated conditions. Non-correlated +// DataSources retain their original AccessPaths and stats (set during cloning) +// so DeriveStats returns early — this avoids failures when conditions reference +// columns that column pruning removed from the DataSource's schema. func resetStatsForCorrelatedDS(p base.LogicalPlan) bool { hasCorrelated := false @@ -368,6 +415,17 @@ func resetStatsForCorrelatedDS(p base.LogicalPlan) bool { break } } + if hasCorrelated { + // Create fresh AccessPaths so fillIndexPath rebuilds them with the + // correlated conditions. The original paths (shared from the clone) + // must not be modified to avoid corrupting the original DataSource. + origPaths := ds.AllPossibleAccessPaths + ds.AllPossibleAccessPaths = make([]*util.AccessPath, len(origPaths)) + for i, ap := range origPaths { + ds.AllPossibleAccessPaths[i] = freshAccessPath(ap) + } + ds.PossibleAccessPaths = append([]*util.AccessPath(nil), ds.AllPossibleAccessPaths...) + } } // Recurse into children. From 2a39db673430fcc61cd26331de5ffca7e468dc4a Mon Sep 17 00:00:00 2001 From: tpp Date: Fri, 20 Feb 2026 21:05:38 -0800 Subject: [PATCH 08/24] increase test coverage --- .../rule/testdata/correlate_suite_in.json | 6 +- .../rule/testdata/correlate_suite_out.json | 69 +++++++++++++++++++ .../rule/testdata/correlate_suite_xut.json | 69 +++++++++++++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json index 38d48f872c1e5..bf60ddbbbc316 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json @@ -11,7 +11,11 @@ "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)", "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", - "select * from t1 where a in (select a from t2) order by a limit 10" + "select * from t1 where a in (select a from t2) order by a limit 10", + "select * from t1 where a in (select a from t2 where b > 1)", + "select * from t1 where a in (select a from t2 group by a)", + "select * from t1 where a in (select a from t2 where b > 1 group by a)", + "select * from t1 where a in (select a from t2 limit 10)" ] } ] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index 2a586822b536d..3a95b3b7a0ec5 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -162,6 +162,75 @@ "1 1", "2 2" ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 where b > 1)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─TableReader(Build) 3330.00 root data:Selection", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 group by a)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 where b > 1 group by a)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─TableReader 2664.00 root data:HashAgg", + "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 limit 10)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─Selection(Build) 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index 2a586822b536d..3a95b3b7a0ec5 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -162,6 +162,75 @@ "1 1", "2 2" ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 where b > 1)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─TableReader(Build) 3330.00 root data:Selection", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 group by a)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 where b > 1 group by a)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─TableReader 2664.00 root data:HashAgg", + "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 limit 10)", + "Plan": [ + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─Selection(Build) 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } From c1a3d751881dc7b268d929766ac6a051b7c9698f Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 21 Feb 2026 17:23:56 -0800 Subject: [PATCH 09/24] new pantheon review comments --- .../core/casetest/rule/rule_correlate_test.go | 34 ++++++++- pkg/planner/core/find_best_task.go | 4 +- pkg/planner/core/rule_correlate.go | 75 +++++++++++++++---- 3 files changed, 97 insertions(+), 16 deletions(-) diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go index 1aee8185b9202..39af182346326 100644 --- a/pkg/planner/core/casetest/rule/rule_correlate_test.go +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -1,4 +1,4 @@ -// Copyright 2025 PingCAP, Inc. +// Copyright 2026 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,38 @@ import ( "github.com/pingcap/tidb/pkg/testkit/testdata" ) +// TestCorrelateNullSemantics verifies that CorrelateSolver does not break +// 3-valued NULL semantics for scalar IN (LeftOuterSemiJoin). +func TestCorrelateNullSemantics(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") + + // Case 1: non-null outer, null inner → must return NULL (not 0). + tk.MustExec("drop table if exists tn, sn") + tk.MustExec("create table tn(a int)") + tk.MustExec("create table sn(a int, key(a))") + tk.MustExec("insert into tn values (1)") + tk.MustExec("insert into sn values (null)") + tk.MustQuery("select tn.a in (select sn.a from sn) as r from tn").Check(testkit.Rows("")) + + // Case 2: null outer, non-null inner → must return NULL (not 0). + tk.MustExec("truncate table tn") + tk.MustExec("truncate table sn") + tk.MustExec("insert into tn values (null)") + tk.MustExec("insert into sn values (1)") + tk.MustQuery("select tn.a in (select sn.a from sn) as r from tn").Check(testkit.Rows("")) + + // Case 3: both columns NOT NULL → correlate is safe; verify correct results. + tk.MustExec("drop table if exists tnn, snn") + tk.MustExec("create table tnn(a int not null)") + tk.MustExec("create table snn(a int not null, key(a))") + tk.MustExec("insert into tnn values (1), (2), (3)") + tk.MustExec("insert into snn values (1), (2)") + tk.MustQuery("select tnn.a in (select snn.a from snn) as r from tnn order by tnn.a").Check(testkit.Rows("1", "1", "0")) +} + func TestCorrelate(tt *testing.T) { testkit.RunTestUnderCascades(tt, func(t *testing.T, tk *testkit.TestKit, cascades, caller string) { tk.MustExec("use test") diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 8473ef32b3d57..572ce5bcce9c9 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -3019,8 +3019,8 @@ func findBestTask4LogicalJoin(super base.LogicalPlan, prop *property.PhysicalPro join := self.(*logicalop.LogicalJoin) join.CorrelateAlternative = nil // Return the cached join task if available (from step 1), otherwise invalid. - // Do NOT retry findBestTask here — the Apply alternative's DeriveStats may - // have corrupted shared AccessPath objects, making a retry unsafe. + // Do NOT retry findBestTask here — the Apply alternative may have + // left partial state, making a retry unsafe. p := self.GetBaseLogicalPlan().(*logicalop.BaseLogicalPlan) if cached := p.GetTask(prop); cached != nil { bestTask = cached diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 0d37aea95b371..e0f5111a28559 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -1,4 +1,4 @@ -// Copyright 2025 PingCAP, Inc. +// Copyright 2026 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,8 @@ import ( "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" "github.com/pingcap/tidb/pkg/planner/util" "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/logutil" + "go.uber.org/zap" ) // CorrelateSolver tries to convert semi-join LogicalJoin back to correlated LogicalApply. @@ -34,7 +36,9 @@ type CorrelateSolver struct{} func (s *CorrelateSolver) Optimize(ctx context.Context, p base.LogicalPlan) (retPlan base.LogicalPlan, retChanged bool, retErr error) { defer func() { if r := recover(); r != nil { - // If correlate panics, return the original plan unchanged. + logutil.BgLogger().Warn("CorrelateSolver panic, returning original plan", + zap.Any("recover", r), + zap.Stack("stack")) retPlan = p retChanged = false retErr = nil @@ -87,6 +91,42 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba leftSchema := join.Children()[0].Schema() rightSchema := join.Children()[1].Schema() + // Left outer semi joins (scalar IN / NOT IN) require 3-valued NULL + // semantics: the joiner must distinguish "no match" (→ 0) from "unknown + // due to NULL" (→ NULL). It does this by evaluating the equality join + // condition and tracking whether any comparison returned NULL. + // + // When we push the equality into the inner side as a correlated filter + // (rightCol = CorCol(leftCol)), two problems arise: + // 1. If the inner column is nullable, NULL inner values are silently + // filtered out (NULL = X → NULL → filtered), so the joiner never + // sees them and returns 0 instead of NULL. + // 2. If the outer column is nullable and its value is NULL, the + // correlated filter becomes rightCol = NULL, which filters out all + // inner rows, and the joiner returns 0 instead of NULL. + // + // Skip unless ALL equality columns on both sides are proven NOT NULL. + if join.JoinType == base.LeftOuterSemiJoin || join.JoinType == base.AntiLeftOuterSemiJoin { + for _, eqCond := range join.EqualConditions { + col0, col1, ok := expression.IsColOpCol(eqCond) + if !ok { + return p, planChanged, nil + } + leftCol := leftSchema.RetrieveColumn(col0) + rightCol := rightSchema.RetrieveColumn(col1) + if leftCol == nil || rightCol == nil { + leftCol = leftSchema.RetrieveColumn(col1) + rightCol = rightSchema.RetrieveColumn(col0) + } + if leftCol == nil || rightCol == nil { + return p, planChanged, nil + } + if !mysql.HasNotNullFlag(leftCol.RetType.GetFlag()) || !mysql.HasNotNullFlag(rightCol.RetType.GetFlag()) { + return p, planChanged, nil + } + } + } + selConds := make([]expression.Expression, 0, len(join.EqualConditions)+len(join.RightConditions)) corCols := make([]*expression.CorrelatedColumn, 0, len(join.EqualConditions)) @@ -257,12 +297,18 @@ func cloneDataSource(ds *logicalop.DataSource) *logicalop.DataSource { // Independent slices that PPD replaces. clone.AllConds = append([]expression.Expression(nil), ds.AllConds...) clone.PushedDownConds = append([]expression.Expression(nil), ds.PushedDownConds...) - // Share the original AccessPaths. DeriveStats returns early for DataSources - // with preserved stats (below), so these paths won't be modified. For - // DataSources that receive correlated conditions, resetStatsForCorrelatedDS - // creates fresh AccessPaths and clears stats before DeriveStats re-runs. - clone.AllPossibleAccessPaths = append([]*util.AccessPath(nil), ds.AllPossibleAccessPaths...) - clone.PossibleAccessPaths = append([]*util.AccessPath(nil), ds.PossibleAccessPaths...) + // Deep-clone AccessPaths so the Join and Apply alternatives have fully + // independent path objects. Stats derivation (fillIndexPath, etc.) mutates + // AccessPath fields in place; without deep cloning, costing one alternative + // can corrupt the other and destabilize CBO. + clone.AllPossibleAccessPaths = make([]*util.AccessPath, len(ds.AllPossibleAccessPaths)) + for i, ap := range ds.AllPossibleAccessPaths { + clone.AllPossibleAccessPaths[i] = ap.Clone() + } + clone.PossibleAccessPaths = make([]*util.AccessPath, len(ds.PossibleAccessPaths)) + for i, ap := range ds.PossibleAccessPaths { + clone.PossibleAccessPaths[i] = ap.Clone() + } // Preserve original stats so DeriveStats returns early for DataSources // that don't receive correlated conditions. Without this, DeriveStats // re-runs fillIndexPath on all DataSources, which fails when conditions @@ -287,6 +333,10 @@ func cloneJoin(j *logicalop.LogicalJoin) (*logicalop.LogicalJoin, bool) { clone.LeftConditions = append(expression.CNFExprs(nil), j.LeftConditions...) clone.RightConditions = append(expression.CNFExprs(nil), j.RightConditions...) clone.OtherConditions = append(expression.CNFExprs(nil), j.OtherConditions...) + // Clear correlate state. The alternative was built for the original join's + // children; retaining it would point physical planning at uncloned nodes. + clone.CorrelateAlternative = nil + clone.PreferCorrelate = false clone.SetChildren(children...) return &clone, true } @@ -401,9 +451,9 @@ func liftDataSourceConds(p base.LogicalPlan) base.LogicalPlan { // // For correlated DataSources, fresh AccessPaths are created so fillIndexPath // starts from a clean state with the new correlated conditions. Non-correlated -// DataSources retain their original AccessPaths and stats (set during cloning) -// so DeriveStats returns early — this avoids failures when conditions reference -// columns that column pruning removed from the DataSource's schema. +// DataSources retain their deep-cloned AccessPaths and stats (set during +// cloning) so DeriveStats returns early — this avoids failures when conditions +// reference columns that column pruning removed from the DataSource's schema. func resetStatsForCorrelatedDS(p base.LogicalPlan) bool { hasCorrelated := false @@ -417,8 +467,7 @@ func resetStatsForCorrelatedDS(p base.LogicalPlan) bool { } if hasCorrelated { // Create fresh AccessPaths so fillIndexPath rebuilds them with the - // correlated conditions. The original paths (shared from the clone) - // must not be modified to avoid corrupting the original DataSource. + // correlated conditions from a clean state. origPaths := ds.AllPossibleAccessPaths ds.AllPossibleAccessPaths = make([]*util.AccessPath, len(origPaths)) for i, ap := range origPaths { From 88efc6a42eb653dc107384d5272c0e7b07f365b3 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 21 Feb 2026 18:04:13 -0800 Subject: [PATCH 10/24] update bazel --- pkg/planner/core/casetest/rule/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index 1943a51d4993b..ae513a9f45bff 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -20,7 +20,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 20, + shard_count = 21, deps = [ "//pkg/config", "//pkg/domain", From 05115687c92e11fa329af8f2cabb72b573131fe2 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 24 Feb 2026 10:46:15 -0800 Subject: [PATCH 11/24] hint fix --- pkg/planner/core/casetest/rule/BUILD.bazel | 2 +- .../core/casetest/rule/rule_correlate_test.go | 34 +++++++++++++++++++ pkg/planner/core/find_best_task.go | 7 ++++ pkg/planner/core/rule_correlate.go | 6 ++++ 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index ae513a9f45bff..4ff9f16d5ed1b 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -20,7 +20,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 21, + shard_count = 22, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go index 39af182346326..24be2c667ed5e 100644 --- a/pkg/planner/core/casetest/rule/rule_correlate_test.go +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -15,10 +15,12 @@ package rule import ( + "strings" "testing" "github.com/pingcap/tidb/pkg/testkit" "github.com/pingcap/tidb/pkg/testkit/testdata" + "github.com/stretchr/testify/require" ) // TestCorrelateNullSemantics verifies that CorrelateSolver does not break @@ -53,6 +55,38 @@ func TestCorrelateNullSemantics(t *testing.T) { tk.MustQuery("select tnn.a in (select snn.a from snn) as r from tnn order by tnn.a").Check(testkit.Rows("1", "1", "0")) } +// TestCorrelatePreservesHints verifies that when the CorrelateSolver builds an +// Apply alternative, user-specified join hints (e.g., HASH_JOIN) are preserved +// and respected during physical plan selection. +func TestCorrelatePreservesHints(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") + + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1 (a int not null, b int, key(a))") + tk.MustExec("create table t2 (a int not null, b int, key(a))") + tk.MustExec("insert into t1 values (1,1),(2,2),(3,3)") + tk.MustExec("insert into t2 values (1,10),(2,20)") + + // With HASH_JOIN hint, the plan should use HashJoin even when the correlate + // optimization is enabled and could produce an Apply alternative. + rows := tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN(t1, t2) */ * from t1 where a in (select a from t2)").Rows() + hasHashJoin := false + for _, row := range rows { + if strings.Contains(row[0].(string), "HashJoin") { + hasHashJoin = true + break + } + } + require.True(t, hasHashJoin, "HASH_JOIN hint should be preserved when correlate optimization is enabled") + + // Verify the same query produces correct results. + tk.MustQuery("select /*+ HASH_JOIN(t1, t2) */ * from t1 where a in (select a from t2) order by t1.a"). + Check(testkit.Rows("1 1", "2 2")) +} + func TestCorrelate(tt *testing.T) { testkit.RunTestUnderCascades(tt, func(t *testing.T, tk *testkit.TestKit, cascades, caller string) { tk.MustExec("use test") diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 572ce5bcce9c9..fb514a15b15a5 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -3051,6 +3051,13 @@ func findBestTask4LogicalJoin(super base.LogicalPlan, prop *property.PhysicalPro return nil, err } + // If the user specified explicit join hints (e.g., HASH_JOIN, INL_JOIN), + // respect them and skip the Apply alternative. Hints are a user override + // that should not be silently overridden by the correlate optimization. + if join.PreferJoinType > 0 { + return joinTask, nil + } + // Step 2: Try the Apply alternative. The Apply path may encounter issues // (e.g., unsupported operator types in the inner subtree, correlated // conditions that confuse the ranger, etc.). Use a recovery mechanism to diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index e0f5111a28559..0c8d2bcc638bd 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -197,6 +197,12 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba ap := logicalop.LogicalApply{}.Init(join.SCtx(), join.QueryBlockOffset()) ap.JoinType = join.JoinType ap.CorCols = corCols + // Copy hint fields so hint behavior is preserved in the alternative. + ap.HintInfo = join.HintInfo + ap.PreferJoinType = join.PreferJoinType + ap.PreferJoinOrder = join.PreferJoinOrder + ap.LeftPreferJoinType = join.LeftPreferJoinType + ap.RightPreferJoinType = join.RightPreferJoinType ap.SetChildren(join.Children()[0], innerPlan) ap.SetSchema(join.Schema().Clone()) ap.SetOutputNames(join.OutputNames()) From a807f60adfe01cfee62b56bc5f0b0fbd33b87489 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 24 Feb 2026 11:16:39 -0800 Subject: [PATCH 12/24] pushdown hint fix --- pkg/planner/core/rule_correlate.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 0c8d2bcc638bd..b21e2bae747c4 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -405,15 +405,17 @@ func cloneLimit(lim *logicalop.LogicalLimit) (*logicalop.LogicalLimit, bool) { // that fillIndexPath / deriveTablePathStats start from a clean state. func freshAccessPath(src *util.AccessPath) *util.AccessPath { return &util.AccessPath{ - Index: src.Index, - StoreType: src.StoreType, - IsIntHandlePath: src.IsIntHandlePath, - IsCommonHandlePath: src.IsCommonHandlePath, - Forced: src.Forced, - ForceKeepOrder: src.ForceKeepOrder, - ForceNoKeepOrder: src.ForceNoKeepOrder, - ForcePartialOrder: src.ForcePartialOrder, - IsUkShardIndexPath: src.IsUkShardIndexPath, + Index: src.Index, + StoreType: src.StoreType, + IsIntHandlePath: src.IsIntHandlePath, + IsCommonHandlePath: src.IsCommonHandlePath, + Forced: src.Forced, + ForceKeepOrder: src.ForceKeepOrder, + ForceNoKeepOrder: src.ForceNoKeepOrder, + ForcePartialOrder: src.ForcePartialOrder, + IsUkShardIndexPath: src.IsUkShardIndexPath, + IndexLookUpPushDownBy: src.IndexLookUpPushDownBy, + NoncacheableReason: src.NoncacheableReason, } } From 155437a4fb2298f6b3cea79d3a2db9a5dd704445 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 6 Apr 2026 15:18:06 -0700 Subject: [PATCH 13/24] refactor after alternative plan PR implemented --- pkg/bindinfo/binding_auto_test.go | 10 +- pkg/planner/core/casetest/rule/BUILD.bazel | 4 +- .../core/casetest/rule/rule_correlate_test.go | 88 +++-- .../rule/testdata/correlate_suite_in.json | 18 +- .../rule/testdata/correlate_suite_out.json | 313 ++++++++++++++---- .../rule/testdata/correlate_suite_xut.json | 313 ++++++++++++++---- pkg/planner/core/core_init.go | 1 - pkg/planner/core/expression_rewriter.go | 34 +- pkg/planner/core/find_best_task.go | 103 ------ .../core/operator/logicalop/logical_join.go | 6 +- .../operator/physicalop/base_physical_plan.go | 5 - pkg/planner/core/optimizer.go | 5 +- pkg/planner/core/rule_correlate.go | 13 +- pkg/planner/optimize.go | 47 ++- .../util/utilfuncp/func_pointer_misc.go | 4 - pkg/sessionctx/stmtctx/stmtctx.go | 12 + pkg/sessionctx/vardef/tidb_vars.go | 5 - pkg/sessionctx/variable/session.go | 5 +- pkg/sessionctx/variable/sysvar.go | 4 - 19 files changed, 674 insertions(+), 316 deletions(-) diff --git a/pkg/bindinfo/binding_auto_test.go b/pkg/bindinfo/binding_auto_test.go index 898d13dd57960..4fc616c268df9 100644 --- a/pkg/bindinfo/binding_auto_test.go +++ b/pkg/bindinfo/binding_auto_test.go @@ -242,17 +242,17 @@ func TestRelevantOptVarsCorrelateSubquery(t *testing.T) { p := parser.New() sql := "select * from t1 where a in (select a from t2)" - // The variable is recorded as relevant regardless of whether it is ON or OFF, - // because the code path where the variable affects plan choice was reached. + // The alternative logical plans variable is recorded as relevant because the + // code path where it affects plan choice (correlate-to-Apply) was reached. for _, enabled := range []string{"OFF", "ON"} { - tk.MustExec("set tidb_opt_enable_correlate_subquery = " + enabled) + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = " + enabled) p.Reset() stmt, err := p.ParseOneStmt(sql, "", "") require.NoError(t, err) vars, _, err := bindinfo.RecordRelevantOptVarsAndFixes(tk.Session(), stmt) require.NoError(t, err) - require.True(t, slices.Contains(vars, vardef.TiDBOptEnableCorrelateSubquery), - "enabled=%s: expected %s in recorded vars %v", enabled, vardef.TiDBOptEnableCorrelateSubquery, vars) + require.True(t, slices.Contains(vars, vardef.TiDBOptEnableAlternativeLogicalPlans), + "enabled=%s: expected %s in recorded vars %v", enabled, vardef.TiDBOptEnableAlternativeLogicalPlans, vars) } } diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index 22a52124542ec..b501535d61803 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -7,8 +7,8 @@ go_test( "dual_test.go", "main_test.go", "rule_cdc_join_reorder_test.go", - "rule_correlate_test.go", "rule_common_handle_ordering_test.go", + "rule_correlate_test.go", "rule_derive_topn_from_window_test.go", "rule_eliminate_empty_selection_test.go", "rule_eliminate_projection_test.go", @@ -21,7 +21,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 30, + shard_count = 34, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go index 24be2c667ed5e..c027b91210eb5 100644 --- a/pkg/planner/core/casetest/rule/rule_correlate_test.go +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -29,7 +29,7 @@ func TestCorrelateNullSemantics(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") - tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") // Case 1: non-null outer, null inner → must return NULL (not 0). tk.MustExec("drop table if exists tn, sn") @@ -55,14 +55,16 @@ func TestCorrelateNullSemantics(t *testing.T) { tk.MustQuery("select tnn.a in (select snn.a from snn) as r from tnn order by tnn.a").Check(testkit.Rows("1", "1", "0")) } -// TestCorrelatePreservesHints verifies that when the CorrelateSolver builds an -// Apply alternative, user-specified join hints (e.g., HASH_JOIN) are preserved -// and respected during physical plan selection. -func TestCorrelatePreservesHints(t *testing.T) { +// TestCorrelateAlternativeChoosesApply verifies that the correlate alternative +// round produces an Apply plan that wins the cost comparison for a non-correlated +// IN subquery when an outer WHERE predicate reduces the estimated row count. +// Without alternative plans, the InnerJoin+Agg rewrite produces IndexJoin+StreamAgg. +// With alternative plans, the correlate round produces Apply+Limit which is cheaper +// (avoids the StreamAgg overhead and uses Limit 1 for early exit on the inner side). +func TestCorrelateAlternativeChoosesApply(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") - tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") tk.MustExec("drop table if exists t1, t2") tk.MustExec("create table t1 (a int not null, b int, key(a))") @@ -70,21 +72,25 @@ func TestCorrelatePreservesHints(t *testing.T) { tk.MustExec("insert into t1 values (1,1),(2,2),(3,3)") tk.MustExec("insert into t2 values (1,10),(2,20)") - // With HASH_JOIN hint, the plan should use HashJoin even when the correlate - // optimization is enabled and could produce an Apply alternative. - rows := tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN(t1, t2) */ * from t1 where a in (select a from t2)").Rows() - hasHashJoin := false - for _, row := range rows { - if strings.Contains(row[0].(string), "HashJoin") { - hasHashJoin = true - break - } - } - require.True(t, hasHashJoin, "HASH_JOIN hint should be preserved when correlate optimization is enabled") + sql := "select * from t1 where b = 1 and a in (select a from t2)" + + // Without alternative plans: standard InnerJoin+Agg path produces IndexJoin. + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = OFF") + rows := tk.MustQuery("explain format = 'brief' " + sql).Rows() + require.True(t, strings.Contains(rows[0][0].(string), "IndexJoin"), + "without alternative plans, expected IndexJoin, got: %s", rows[0][0]) + + // With alternative plans: correlate round produces Apply (cheaper than IndexJoin+StreamAgg). + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") + rows = tk.MustQuery("explain format = 'brief' " + sql).Rows() + require.True(t, strings.Contains(rows[0][0].(string), "Apply"), + "with alternative plans, expected Apply, got: %s", rows[0][0]) - // Verify the same query produces correct results. - tk.MustQuery("select /*+ HASH_JOIN(t1, t2) */ * from t1 where a in (select a from t2) order by t1.a"). - Check(testkit.Rows("1 1", "2 2")) + // Verify correct results in both modes. + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = OFF") + tk.MustQuery(sql).Check(testkit.Rows("1 1")) + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") + tk.MustQuery(sql).Check(testkit.Rows("1 1")) } func TestCorrelate(tt *testing.T) { @@ -99,7 +105,47 @@ func TestCorrelate(tt *testing.T) { tk.MustExec("insert into t3 values (10,1),(20,2)") // Enable the correlate rule. - tk.MustExec("set tidb_opt_enable_correlate_subquery = ON") + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") + + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + suite := GetCorrelateSuiteData() + suite.LoadTestCases(t, &input, &output, cascades, caller) + for i, sql := range input { + testdata.OnRecord(func() { + output[i].SQL = sql + output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + sql).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows()) + }) + tk.MustQuery("explain format = 'brief' " + sql).Check(testkit.Rows(output[i].Plan...)) + tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...)) + } + }) +} + +// TestCorrelateWithCostFactors verifies that when hash/merge join cost factors +// are increased, the correlate alternative round wins and produces Apply-based +// plans with correlated index access for cases that normally choose HashJoin. +func TestCorrelateWithCostFactors(tt *testing.T) { + testkit.RunTestUnderCascades(tt, func(t *testing.T, tk *testkit.TestKit, cascades, caller string) { + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2, t3") + tk.MustExec("create table t1 (a int, b int, key(a))") + tk.MustExec("create table t2 (a int, b int, key(a))") + tk.MustExec("create table t3 (a int, b int, key(a))") + tk.MustExec("insert into t1 values (1,1),(2,2),(3,3)") + tk.MustExec("insert into t2 values (1,10),(2,20)") + tk.MustExec("insert into t3 values (10,1),(20,2)") + + // Enable the correlate rule and penalize hash/merge joins so the + // correlate alternative (Apply with index lookup) wins the cost comparison. + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") + tk.MustExec("set tidb_opt_hash_join_cost_factor = 1000") + tk.MustExec("set tidb_opt_merge_join_cost_factor = 1000") var input []string var output []struct { diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json index bf60ddbbbc316..d974904bc062b 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json @@ -15,7 +15,23 @@ "select * from t1 where a in (select a from t2 where b > 1)", "select * from t1 where a in (select a from t2 group by a)", "select * from t1 where a in (select a from t2 where b > 1 group by a)", - "select * from t1 where a in (select a from t2 limit 10)" + "select * from t1 where a in (select a from t2 limit 10)", + "select * from t1 where b = 1 and a in (select a from t2)", + "select * from t1 where b = 1 and exists (select 1 from t2 where t2.a = t1.a) limit 1", + "select * from t1 where b = 1 and a not in (select a from t2) limit 1", + "select * from t1 where b = 1 and a in (select a from t2 where t2.b > 0) limit 1" + ] + }, + { + "name": "TestCorrelateWithCostFactors", + "cases": [ + "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", + "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", + "select * from t1 where a in (select a from t2)", + "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", + "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", + "select * from t1 where a in (select a from t2) order by a limit 10", + "select * from t1 where a in (select a from t2 where b > 1)" ] } ] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index 3a95b3b7a0ec5..c9be9de330595 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -5,13 +5,12 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexReader 10000.00 root index:Limit", - " └─Limit 10000.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -21,13 +20,11 @@ { "SQL": "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexReader 10000.00 root index:Limit", - " └─Limit 10000.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "HashJoin 8000.00 root anti semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 10000.00 root index:IndexFullScan", + "│ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "3 3" @@ -36,9 +33,11 @@ { "SQL": "select * from t1 where a in (select a from t2)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─IndexReader(Build) 9990.00 root index:IndexFullScan", - "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" @@ -79,14 +78,12 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexReader 10000.00 root index:Limit", - " └─Limit 10000.00 cop[tikv] offset:0, count:1", - " └─Selection 809900.00 cop[tikv] gt(test.t2.a, test.t1.a)", - " └─IndexFullScan 1012375.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1" @@ -95,15 +92,13 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexLookUp 10000.00 root ", - " ├─IndexRangeScan(Build) 13375.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", - " └─Limit(Probe) 10000.00 cop[tikv] offset:0, count:1", - " └─Selection 10000.00 cop[tikv] gt(test.t2.b, test.t1.b)", - " └─TableRowIDScan 13375.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + "HashJoin 7984.01 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)], other cond:gt(test.t2.b, test.t1.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9980.01 root data:Selection", + " └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -113,7 +108,7 @@ { "SQL": "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "Apply 8000.00 root CARTESIAN semi join, left side:TableReader", "├─TableReader(Build) 10000.00 root data:TableFullScan", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", "└─Limit(Probe) 10000.00 root offset:0, count:1", @@ -129,14 +124,15 @@ { "SQL": "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─HashJoin(Build) 4162.50 root inner join, equal:[eq(test.t3.a, test.t2.b)]", - "│ ├─TableReader(Build) 3330.00 root data:Selection", - "│ │ └─Selection 3330.00 cop[tikv] gt(test.t3.b, 0), not(isnull(test.t3.a))", - "│ │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", - "│ └─TableReader(Probe) 9980.01 root data:Selection", - "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "HashJoin 5203.12 root inner join, equal:[eq(test.t1.a, test.t2.a)]", + "├─HashAgg(Build) 4162.50 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─HashJoin 4162.50 root inner join, equal:[eq(test.t3.a, test.t2.b)]", + "│ ├─TableReader(Build) 3330.00 root data:Selection", + "│ │ └─Selection 3330.00 cop[tikv] gt(test.t3.b, 0), not(isnull(test.t3.a))", + "│ │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + "│ └─TableReader(Probe) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" @@ -150,13 +146,15 @@ "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", "Plan": [ "Limit 10.00 root offset:0, count:10", - "└─MergeJoin 10.00 root semi join, left side:Projection, left key:test.t1.a, right key:test.t2.a", - " ├─IndexReader(Build) 12.50 root index:IndexFullScan", - " │ └─IndexFullScan 12.50 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", - " └─Projection(Probe) 12.50 root test.t1.a, test.t1.b", - " └─IndexLookUp 12.50 root ", - " ├─IndexFullScan(Build) 12.50 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", - " └─TableRowIDScan(Probe) 12.50 cop[tikv] table:t1 keep order:false, stats:pseudo" + "└─MergeJoin 10.00 root inner join, left key:test.t1.a, right key:test.t2.a", + " ├─StreamAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " │ └─IndexReader 8.00 root index:StreamAgg", + " │ └─StreamAgg 8.00 cop[tikv] group by:test.t2.a, ", + " │ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + " └─Projection(Probe) 10.00 root test.t1.a, test.t1.b", + " └─IndexLookUp 10.00 root ", + " ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -166,10 +164,12 @@ { "SQL": "select * from t1 where a in (select a from t2 where b > 1)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─TableReader(Build) 3330.00 root data:Selection", - "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "HashJoin 3330.00 root inner join, equal:[eq(test.t2.a, test.t1.a)]", + "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─TableReader 2664.00 root data:HashAgg", + "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" @@ -182,7 +182,7 @@ { "SQL": "select * from t1 where a in (select a from t2 group by a)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", "│ └─IndexReader 7992.00 root index:StreamAgg", "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", @@ -199,7 +199,7 @@ { "SQL": "select * from t1 where a in (select a from t2 where b > 1 group by a)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "HashJoin 3330.00 root inner join, equal:[eq(test.t2.a, test.t1.a)]", "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", "│ └─TableReader 2664.00 root data:HashAgg", "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", @@ -217,16 +217,205 @@ { "SQL": "select * from t1 where a in (select a from t2 limit 10)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─Selection(Build) 8.00 root not(isnull(test.t2.a))", - "│ └─Limit 10.00 root offset:0, count:10", - "│ └─IndexReader 10.00 root index:Limit", - "│ └─Limit 10.00 cop[tikv] offset:0, count:10", - "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "IndexHashJoin 10.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─HashAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─Selection 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 10.00 root ", + " ├─Selection(Build) 10.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10.01 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where b = 1 and a in (select a from t2)", + "Plan": [ + "IndexJoin 9.99 root inner join, inner:StreamAgg, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 9.99 root data:Selection", + "│ └─Selection 9.99 cop[tikv] eq(test.t1.b, 1), not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─StreamAgg(Probe) 9.99 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " └─IndexReader 9.99 root index:Selection", + " └─Selection 9.99 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:true, stats:pseudo" + ], + "Result": [ + "1 1" + ] + }, + { + "SQL": "select * from t1 where b = 1 and exists (select 1 from t2 where t2.a = t1.a) limit 1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─IndexHashJoin 1.00 root semi join, inner:IndexReader, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + " ├─TableReader(Build) 1.25 root data:Selection", + " │ └─Selection 1.25 cop[tikv] eq(test.t1.b, 1), not(isnull(test.t1.a))", + " │ └─TableFullScan 1251.25 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─IndexReader(Probe) 1.56 root index:Selection", + " └─Selection 1.56 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 1.56 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1" + ] + }, + { + "SQL": "select * from t1 where b = 1 and a not in (select a from t2) limit 1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─HashJoin 1.00 root Null-aware anti semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + " ├─IndexReader(Build) 10000.00 root index:IndexFullScan", + " │ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + " └─TableReader(Probe) 1.25 root data:Selection", + " └─Selection 1.25 cop[tikv] eq(test.t1.b, 1)", + " └─TableFullScan 1250.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": null + }, + { + "SQL": "select * from t1 where b = 1 and a in (select a from t2 where t2.b > 0) limit 1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─IndexJoin 1.00 root inner join, inner:StreamAgg, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + " ├─TableReader(Build) 1.00 root data:Selection", + " │ └─Selection 1.00 cop[tikv] eq(test.t1.b, 1), not(isnull(test.t1.a))", + " │ └─TableFullScan 1001.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─StreamAgg(Probe) 1.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " └─Projection 1.00 root test.t2.a, test.t2.b", + " └─IndexLookUp 1.00 root ", + " ├─Selection(Build) 3.00 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 3.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:true, stats:pseudo", + " └─Selection(Probe) 1.00 cop[tikv] gt(test.t2.b, 0)", + " └─TableRowIDScan 3.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1" + ] + } + ] + }, + { + "Name": "TestCorrelateWithCostFactors", + "Cases": [ + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "IndexHashJoin 7992.00 root semi join, inner:IndexReader, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexReader(Probe) 12487.50 root index:Selection", + " └─Selection 12487.50 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "IndexHashJoin 8000.00 root anti semi join, inner:IndexReader, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexReader(Probe) 12500.00 root index:IndexRangeScan", + " └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "3 3" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2)", + "Plan": [ + "IndexHashJoin 9990.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─IndexLookUp(Probe) 9990.00 root ", + " ├─Selection(Build) 9990.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10000.00 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 9990.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", + "Plan": [ + "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], + "Result": [ + "1 1" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", + "Plan": [ + "IndexHashJoin 7984.01 root semi join, inner:IndexLookUp, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a), other cond:gt(test.t2.b, test.t1.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 12475.01 root ", + " ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─Selection(Probe) 12475.01 cop[tikv] not(isnull(test.t2.b))", + " └─TableRowIDScan 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", + "Plan": [ + "Limit 10.00 root offset:0, count:10", + "└─IndexJoin 10.00 root inner join, inner:StreamAgg, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + " ├─Projection(Build) 10.00 root test.t1.a, test.t1.b", + " │ └─IndexLookUp 10.00 root ", + " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─StreamAgg(Probe) 10.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " └─IndexReader 10.00 root index:Selection", + " └─Selection 10.00 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 10.01 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:true, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 where b > 1)", + "Plan": [ + "IndexHashJoin 3330.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─TableReader 2664.00 root data:HashAgg", + "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 3330.00 root ", + " ├─Selection(Build) 3330.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 3330.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], "Result": [ "1 1", "2 2" diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index 3a95b3b7a0ec5..c9be9de330595 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -5,13 +5,12 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexReader 10000.00 root index:Limit", - " └─Limit 10000.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -21,13 +20,11 @@ { "SQL": "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN anti semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexReader 10000.00 root index:Limit", - " └─Limit 10000.00 cop[tikv] offset:0, count:1", - " └─IndexRangeScan 10000.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + "HashJoin 8000.00 root anti semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "├─IndexReader(Build) 10000.00 root index:IndexFullScan", + "│ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "3 3" @@ -36,9 +33,11 @@ { "SQL": "select * from t1 where a in (select a from t2)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─IndexReader(Build) 9990.00 root index:IndexFullScan", - "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" @@ -79,14 +78,12 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexReader 10000.00 root index:Limit", - " └─Limit 10000.00 cop[tikv] offset:0, count:1", - " └─Selection 809900.00 cop[tikv] gt(test.t2.a, test.t1.a)", - " └─IndexFullScan 1012375.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo" + "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1" @@ -95,15 +92,13 @@ { "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", - "├─TableReader(Build) 10000.00 root data:TableFullScan", - "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "└─Limit(Probe) 10000.00 root offset:0, count:1", - " └─IndexLookUp 10000.00 root ", - " ├─IndexRangeScan(Build) 13375.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", - " └─Limit(Probe) 10000.00 cop[tikv] offset:0, count:1", - " └─Selection 10000.00 cop[tikv] gt(test.t2.b, test.t1.b)", - " └─TableRowIDScan 13375.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + "HashJoin 7984.01 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)], other cond:gt(test.t2.b, test.t1.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9980.01 root data:Selection", + " └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -113,7 +108,7 @@ { "SQL": "select * from t1 where exists (select /*+ NO_DECORRELATE() */ 1 from t2 where t2.a = t1.a)", "Plan": [ - "Apply 10000.00 root CARTESIAN semi join, left side:TableReader", + "Apply 8000.00 root CARTESIAN semi join, left side:TableReader", "├─TableReader(Build) 10000.00 root data:TableFullScan", "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", "└─Limit(Probe) 10000.00 root offset:0, count:1", @@ -129,14 +124,15 @@ { "SQL": "select * from t1 where a in (select t2.a from t2 inner join t3 on t3.a = t2.b where t3.b > 0)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─HashJoin(Build) 4162.50 root inner join, equal:[eq(test.t3.a, test.t2.b)]", - "│ ├─TableReader(Build) 3330.00 root data:Selection", - "│ │ └─Selection 3330.00 cop[tikv] gt(test.t3.b, 0), not(isnull(test.t3.a))", - "│ │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", - "│ └─TableReader(Probe) 9980.01 root data:Selection", - "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "HashJoin 5203.12 root inner join, equal:[eq(test.t1.a, test.t2.a)]", + "├─HashAgg(Build) 4162.50 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─HashJoin 4162.50 root inner join, equal:[eq(test.t3.a, test.t2.b)]", + "│ ├─TableReader(Build) 3330.00 root data:Selection", + "│ │ └─Selection 3330.00 cop[tikv] gt(test.t3.b, 0), not(isnull(test.t3.a))", + "│ │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + "│ └─TableReader(Probe) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a)), not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" @@ -150,13 +146,15 @@ "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", "Plan": [ "Limit 10.00 root offset:0, count:10", - "└─MergeJoin 10.00 root semi join, left side:Projection, left key:test.t1.a, right key:test.t2.a", - " ├─IndexReader(Build) 12.50 root index:IndexFullScan", - " │ └─IndexFullScan 12.50 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", - " └─Projection(Probe) 12.50 root test.t1.a, test.t1.b", - " └─IndexLookUp 12.50 root ", - " ├─IndexFullScan(Build) 12.50 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", - " └─TableRowIDScan(Probe) 12.50 cop[tikv] table:t1 keep order:false, stats:pseudo" + "└─MergeJoin 10.00 root inner join, left key:test.t1.a, right key:test.t2.a", + " ├─StreamAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " │ └─IndexReader 8.00 root index:StreamAgg", + " │ └─StreamAgg 8.00 cop[tikv] group by:test.t2.a, ", + " │ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + " └─Projection(Probe) 10.00 root test.t1.a, test.t1.b", + " └─IndexLookUp 10.00 root ", + " ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ "1 1", @@ -166,10 +164,12 @@ { "SQL": "select * from t1 where a in (select a from t2 where b > 1)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─TableReader(Build) 3330.00 root data:Selection", - "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", - "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "HashJoin 3330.00 root inner join, equal:[eq(test.t2.a, test.t1.a)]", + "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─TableReader 2664.00 root data:HashAgg", + "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" @@ -182,7 +182,7 @@ { "SQL": "select * from t1 where a in (select a from t2 group by a)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "HashJoin 9990.00 root inner join, equal:[eq(test.t1.a, test.t2.a)]", "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", "│ └─IndexReader 7992.00 root index:StreamAgg", "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", @@ -199,7 +199,7 @@ { "SQL": "select * from t1 where a in (select a from t2 where b > 1 group by a)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + "HashJoin 3330.00 root inner join, equal:[eq(test.t2.a, test.t1.a)]", "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", "│ └─TableReader 2664.00 root data:HashAgg", "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", @@ -217,16 +217,205 @@ { "SQL": "select * from t1 where a in (select a from t2 limit 10)", "Plan": [ - "HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", - "├─Selection(Build) 8.00 root not(isnull(test.t2.a))", - "│ └─Limit 10.00 root offset:0, count:10", - "│ └─IndexReader 10.00 root index:Limit", - "│ └─Limit 10.00 cop[tikv] offset:0, count:10", - "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "IndexHashJoin 10.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─HashAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─Selection 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 10.00 root ", + " ├─Selection(Build) 10.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10.01 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where b = 1 and a in (select a from t2)", + "Plan": [ + "IndexJoin 9.99 root inner join, inner:StreamAgg, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 9.99 root data:Selection", + "│ └─Selection 9.99 cop[tikv] eq(test.t1.b, 1), not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─StreamAgg(Probe) 9.99 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " └─IndexReader 9.99 root index:Selection", + " └─Selection 9.99 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:true, stats:pseudo" + ], + "Result": [ + "1 1" + ] + }, + { + "SQL": "select * from t1 where b = 1 and exists (select 1 from t2 where t2.a = t1.a) limit 1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─IndexHashJoin 1.00 root semi join, inner:IndexReader, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + " ├─TableReader(Build) 1.25 root data:Selection", + " │ └─Selection 1.25 cop[tikv] eq(test.t1.b, 1), not(isnull(test.t1.a))", + " │ └─TableFullScan 1251.25 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─IndexReader(Probe) 1.56 root index:Selection", + " └─Selection 1.56 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 1.56 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1" + ] + }, + { + "SQL": "select * from t1 where b = 1 and a not in (select a from t2) limit 1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─HashJoin 1.00 root Null-aware anti semi join, left side:TableReader, equal:[eq(test.t1.a, test.t2.a)]", + " ├─IndexReader(Build) 10000.00 root index:IndexFullScan", + " │ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", + " └─TableReader(Probe) 1.25 root data:Selection", + " └─Selection 1.25 cop[tikv] eq(test.t1.b, 1)", + " └─TableFullScan 1250.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": null + }, + { + "SQL": "select * from t1 where b = 1 and a in (select a from t2 where t2.b > 0) limit 1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─IndexJoin 1.00 root inner join, inner:StreamAgg, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + " ├─TableReader(Build) 1.00 root data:Selection", + " │ └─Selection 1.00 cop[tikv] eq(test.t1.b, 1), not(isnull(test.t1.a))", + " │ └─TableFullScan 1001.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─StreamAgg(Probe) 1.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " └─Projection 1.00 root test.t2.a, test.t2.b", + " └─IndexLookUp 1.00 root ", + " ├─Selection(Build) 3.00 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 3.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:true, stats:pseudo", + " └─Selection(Probe) 1.00 cop[tikv] gt(test.t2.b, 0)", + " └─TableRowIDScan 3.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1" + ] + } + ] + }, + { + "Name": "TestCorrelateWithCostFactors", + "Cases": [ + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "IndexHashJoin 7992.00 root semi join, inner:IndexReader, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexReader(Probe) 12487.50 root index:Selection", + " └─Selection 12487.50 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where not exists (select 1 from t2 where t2.a = t1.a)", + "Plan": [ + "IndexHashJoin 8000.00 root anti semi join, inner:IndexReader, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexReader(Probe) 12500.00 root index:IndexRangeScan", + " └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo" + ], + "Result": [ + "3 3" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2)", + "Plan": [ + "IndexHashJoin 9990.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─StreamAgg(Build) 7992.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─IndexReader 7992.00 root index:StreamAgg", + "│ └─StreamAgg 7992.00 cop[tikv] group by:test.t2.a, ", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─IndexLookUp(Probe) 9990.00 root ", + " ├─Selection(Build) 9990.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10000.00 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 9990.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", + "Plan": [ + "HashJoin 7992.00 root CARTESIAN semi join, left side:TableReader, other cond:gt(test.t2.a, test.t1.a)", + "├─IndexReader(Build) 9990.00 root index:IndexFullScan", + "│ └─IndexFullScan 9990.00 cop[tikv] table:t2, index:a(a) keep order:false, stats:pseudo", "└─TableReader(Probe) 9990.00 root data:Selection", " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], + "Result": [ + "1 1" + ] + }, + { + "SQL": "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", + "Plan": [ + "IndexHashJoin 7984.01 root semi join, inner:IndexLookUp, left side:TableReader, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a), other cond:gt(test.t2.b, test.t1.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 12475.01 root ", + " ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─Selection(Probe) 12475.01 cop[tikv] not(isnull(test.t2.b))", + " └─TableRowIDScan 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2) order by a limit 10", + "Plan": [ + "Limit 10.00 root offset:0, count:10", + "└─IndexJoin 10.00 root inner join, inner:StreamAgg, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + " ├─Projection(Build) 10.00 root test.t1.a, test.t1.b", + " │ └─IndexLookUp 10.00 root ", + " │ ├─IndexFullScan(Build) 10.00 cop[tikv] table:t1, index:a(a) keep order:true, stats:pseudo", + " │ └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─StreamAgg(Probe) 10.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + " └─IndexReader 10.00 root index:Selection", + " └─Selection 10.00 cop[tikv] not(isnull(test.t2.a))", + " └─IndexRangeScan 10.01 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:true, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 where b > 1)", + "Plan": [ + "IndexHashJoin 3330.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─HashAgg(Build) 2664.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─TableReader 2664.00 root data:HashAgg", + "│ └─HashAgg 2664.00 cop[tikv] group by:test.t2.a, ", + "│ └─Selection 3330.00 cop[tikv] gt(test.t2.b, 1), not(isnull(test.t2.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 3330.00 root ", + " ├─Selection(Build) 3330.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 3330.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], "Result": [ "1 1", "2 2" diff --git a/pkg/planner/core/core_init.go b/pkg/planner/core/core_init.go index 4d540a7a4a8ba..0c4888db7af92 100644 --- a/pkg/planner/core/core_init.go +++ b/pkg/planner/core/core_init.go @@ -30,7 +30,6 @@ func init() { // For code refactor init. utilfuncp.FindBestTask4BaseLogicalPlan = findBestTask utilfuncp.FindBestTask4LogicalDataSource = findBestTask4LogicalDataSource - utilfuncp.FindBestTask4LogicalJoin = findBestTask4LogicalJoin utilfuncp.ExhaustPhysicalPlans4LogicalJoin = exhaustPhysicalPlans4LogicalJoin utilfuncp.ExhaustPhysicalPlans4LogicalApply = exhaustPhysicalPlans4LogicalApply diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 58f0f88e68bad..95c61d1f1da13 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1082,10 +1082,10 @@ func (er *expressionRewriter) handleExistSubquery(ctx context.Context, planCtx * // Add LIMIT 1 when noDecorrelate is true for EXISTS subqueries to enable early exit corCols := coreusage.ExtractCorColumnsBySchema4LogicalPlan(np, planCtx.plan.Schema()) noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingExistsSubquery) - // When EnableCorrelateSubquery is ON, prevent decorrelation of correlated - // subqueries so they stay as Apply with index lookups. + // When EnableCorrelateSubquery is ON (set by the correlate alternative round), + // prevent decorrelation of correlated subqueries so they stay as Apply with index lookups. if !noDecorrelate && len(corCols) > 0 { - b.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + b.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableAlternativeLogicalPlans) if b.ctx.GetSessionVars().EnableCorrelateSubquery { noDecorrelate = true } @@ -1292,10 +1292,10 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp collFlag := collate.CompatibleCollate(lt.GetCollate(), rt.GetCollate()) corCols := coreusage.ExtractCorColumnsBySchema4LogicalPlan(np, planCtx.plan.Schema()) noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingInSubquery) - // When EnableCorrelateSubquery is ON, prevent decorrelation of correlated - // IN subqueries so they stay as Apply with index lookups. + // When EnableCorrelateSubquery is ON (set by the correlate alternative round), + // prevent decorrelation of correlated IN subqueries so they stay as Apply with index lookups. if !noDecorrelate && len(corCols) > 0 && !v.Not { - planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableAlternativeLogicalPlans) if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { noDecorrelate = true } @@ -1305,13 +1305,18 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp // and has no correlated column from the current level plan(if the correlated column is from upper level, // we can treat it as constant, because the upper LogicalApply cannot be eliminated since current node is a join node), // and don't need to append a scalar value, we can rewrite it to inner join. - // When EnableCorrelateSubquery is ON, skip the InnerJoin+Agg rewrite so that a SemiJoin is built - // instead; the CorrelateSolver rule can then convert it to a correlated Apply with index lookups. + // When EnableCorrelateSubquery is ON (set by the correlate alternative round), skip the + // InnerJoin+Agg rewrite so that a SemiJoin is built instead; the CorrelateSolver rule can + // then convert it to a correlated Apply with index lookups. canRewriteToJoinAgg := planCtx.builder.ctx.GetSessionVars().GetAllowInSubqToJoinAndAgg() && !v.Not && !asScalar && len(corCols) == 0 && collFlag if canRewriteToJoinAgg { - // Record that the correlate variable is relevant — toggling it changes - // whether we take the InnerJoin+Agg path or the SemiApply path. - planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + // Record that the alternative logical plans variable is relevant — toggling it + // changes whether we take the InnerJoin+Agg path or the SemiApply path. + planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableAlternativeLogicalPlans) + // Signal that a correlate alternative round is worth attempting. + if planCtx.builder.ctx.GetSessionVars().EnableAlternativeLogicalPlans { + planCtx.builder.ctx.GetSessionVars().StmtCtx.MarkAlternativeLogicalPlanPreferCorrelate() + } } if canRewriteToJoinAgg && !planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { // We need to try to eliminate the agg and the projection produced by this operation. @@ -1393,10 +1398,11 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp if er.err != nil { return v, true } - // When EnableCorrelateSubquery is ON and the subquery is non-correlated, - // mark the join so that CorrelateSolver converts it to a correlated Apply. + // When EnableCorrelateSubquery is ON (set by the correlate alternative round) + // and the subquery is non-correlated, mark the join so that CorrelateSolver + // converts it to a correlated Apply. if len(corCols) == 0 && !v.Not { - planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableCorrelateSubquery) + planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableAlternativeLogicalPlans) if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { if ap, ok := planCtx.plan.(*logicalop.LogicalApply); ok { ap.PreferCorrelate = true diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 477849385d0a8..cf3d1722d4493 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -3123,109 +3123,6 @@ func ExhaustPhysicalPlans4MockLogicalPlan(p *mockLogicalPlan4Test, prop *propert return append(plan1, plan2...), true, nil } -// findBestTask4LogicalJoin handles LogicalJoin nodes that have a CorrelateAlternative. -// It compares the Join path and the Apply path via CBO and returns the cheaper one. -// If any panic occurs, it falls back to the standard findBestTask behavior. -func findBestTask4LogicalJoin(super base.LogicalPlan, prop *property.PhysicalProperty) (bestTask base.Task, retErr error) { - defer func() { - if r := recover(); r != nil { - logutil.BgLogger().Warn("findBestTask4LogicalJoin panic, falling back to join task", - zap.Any("recover", r), - zap.Stack("stack")) - // Disable the correlate alternative so future calls use the standard path. - _, self := getGEAndSelf(super) - join := self.(*logicalop.LogicalJoin) - join.CorrelateAlternative = nil - // Return the cached join task if available (from step 1), otherwise invalid. - // Do NOT retry findBestTask here — the Apply alternative may have - // left partial state, making a retry unsafe. - p := self.GetBaseLogicalPlan().(*logicalop.BaseLogicalPlan) - if cached := p.GetTask(prop); cached != nil { - bestTask = cached - retErr = nil - } else { - bestTask = base.InvalidTask - retErr = nil - } - } - }() - - _, self := getGEAndSelf(super) - join := self.(*logicalop.LogicalJoin) - p := self.GetBaseLogicalPlan().(*logicalop.BaseLogicalPlan) - - if prop == nil { - return nil, nil - } - - // Cache check: if winner was already computed for this prop, return it. - if cached := p.GetTask(prop); cached != nil { - return cached, nil - } - - // Step 1: Get the Join path's best task (caches on p internally). - joinTask, err := findBestTask(super, prop) - if err != nil { - return nil, err - } - - // If the user specified explicit join hints (e.g., HASH_JOIN, INL_JOIN), - // respect them and skip the Apply alternative. Hints are a user override - // that should not be silently overridden by the correlate optimization. - if join.PreferJoinType > 0 { - return joinTask, nil - } - - // Step 2: Try the Apply alternative. The Apply path may encounter issues - // (e.g., unsupported operator types in the inner subtree, correlated - // conditions that confuse the ranger, etc.). Use a recovery mechanism to - // ensure the Join path is always available as a safe fallback. - applyTask := tryCorrelateAlternative(join, prop) - - // Step 3: Compare and cache the winner. - bestTask = joinTask - if applyTask != nil { - if applyIsBetter, err := compareTaskCost(applyTask, joinTask); err != nil { - return nil, err - } else if applyIsBetter { - bestTask = applyTask - } - } - - // Overwrite cache with winner (findBestTask cached joinTask; overwrite if apply won). - p.StoreTask(prop, bestTask) - return bestTask, nil -} - -// tryCorrelateAlternative evaluates the Apply alternative for a LogicalJoin. -// Returns the Apply task on success, or nil if the Apply path fails for any reason. -func tryCorrelateAlternative(join *logicalop.LogicalJoin, prop *property.PhysicalProperty) (result base.Task) { - defer func() { - if r := recover(); r != nil { - logutil.BgLogger().Warn("correlate alternative failed, falling back to join", - zap.Any("recover", r), - zap.Stack("stack")) - result = nil - } - }() - - ap := join.CorrelateAlternative - - // Derive stats — inner child needs fresh stats (cleared by resetStatsForCorrelatedDS). - if _, _, err := ap.RecursiveDeriveStats(nil); err != nil { - return nil - } - preparePossibleProperties(ap) - - // Get the Apply path's best task. - applyTask, err := physicalop.FindBestTask(ap, prop) - if err != nil { - return nil - } - - return applyTask -} - type mockPhysicalPlan4Test struct { physicalop.BasePhysicalPlan // 1 or 2 for physicalPlan1 or physicalPlan2. diff --git a/pkg/planner/core/operator/logicalop/logical_join.go b/pkg/planner/core/operator/logicalop/logical_join.go index 50f26854eac2a..534df5922dbcd 100644 --- a/pkg/planner/core/operator/logicalop/logical_join.go +++ b/pkg/planner/core/operator/logicalop/logical_join.go @@ -101,14 +101,10 @@ type LogicalJoin struct { RedundantColsToOutputIdx map[int64]int // PreferCorrelate is set to true when this SemiJoin originated from a non-correlated - // IN subquery with EnableCorrelateSubquery=ON, indicating that the CorrelateSolver + // IN subquery during the correlate alternative round, indicating that the CorrelateSolver // should convert it back to a correlated Apply with index lookups. PreferCorrelate bool - // CorrelateAlternative holds the LogicalApply alternative built by CorrelateSolver. - // When non-nil, findBestTask compares both Join and Apply paths and picks the cheaper one. - CorrelateAlternative *LogicalApply - // EqualCondOutCnt indicates the estimated count of joined rows after evaluating `EqualConditions`. EqualCondOutCnt float64 diff --git a/pkg/planner/core/operator/physicalop/base_physical_plan.go b/pkg/planner/core/operator/physicalop/base_physical_plan.go index c6cb683939b9a..6fb5c3e77ba99 100644 --- a/pkg/planner/core/operator/physicalop/base_physical_plan.go +++ b/pkg/planner/core/operator/physicalop/base_physical_plan.go @@ -492,11 +492,6 @@ func FindBestTask(e base.LogicalPlan, prop *property.PhysicalProperty) (bestTask return findBestTask4LogicalShowDDLJobs(e, prop) case *logicalop.MockDataSource: return findBestTask4LogicalMockDatasource(lop, prop) - case *logicalop.LogicalJoin: - if lop.CorrelateAlternative != nil { - return utilfuncp.FindBestTask4LogicalJoin(e, prop) - } - return utilfuncp.FindBestTask4BaseLogicalPlan(e, prop) default: return utilfuncp.FindBestTask4BaseLogicalPlan(e, prop) } diff --git a/pkg/planner/core/optimizer.go b/pkg/planner/core/optimizer.go index f44d871e42cd3..92864407679a5 100644 --- a/pkg/planner/core/optimizer.go +++ b/pkg/planner/core/optimizer.go @@ -361,9 +361,8 @@ func adjustOptimizationFlags(flag uint64, logic base.LogicalPlan) uint64 { if !logic.SCtx().GetSessionVars().StmtCtx.UseDynamicPruneMode { flag |= rule.FlagPartitionProcessor // apply partition pruning under static mode } - if logic.SCtx().GetSessionVars().EnableCorrelateSubquery { - flag |= rule.FlagCorrelate - } + // FlagCorrelate is added by the correlate alternative round's flag adjuster, + // not here. EnableCorrelateSubquery is an internal flag toggled by the round. // Recompute FlagPruneColumnsAgain after all conditional flag mutations so // that conditionally-added flags (FlagCorrelate, FlagPartitionProcessor, …) // are taken into account. A second column-prune pass is worthwhile when diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index b21e2bae747c4..0206684d29888 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -207,10 +207,10 @@ func (s *CorrelateSolver) correlate(ctx context.Context, p base.LogicalPlan) (ba ap.SetSchema(join.Schema().Clone()) ap.SetOutputNames(join.OutputNames()) - // Store the Apply alternative on the Join for cost-based selection during - // physical optimization, rather than unconditionally choosing Apply. - join.CorrelateAlternative = ap - return p, true, nil + // Replace the Join with the Apply. In the alternative logical plans framework, + // this round produces a complete plan; the top-level cost comparison across + // rounds selects the winner. + return ap, true, nil } // buildCorrelatedCond converts an equal condition from the join into a correlated condition @@ -339,9 +339,8 @@ func cloneJoin(j *logicalop.LogicalJoin) (*logicalop.LogicalJoin, bool) { clone.LeftConditions = append(expression.CNFExprs(nil), j.LeftConditions...) clone.RightConditions = append(expression.CNFExprs(nil), j.RightConditions...) clone.OtherConditions = append(expression.CNFExprs(nil), j.OtherConditions...) - // Clear correlate state. The alternative was built for the original join's - // children; retaining it would point physical planning at uncloned nodes. - clone.CorrelateAlternative = nil + // Clear PreferCorrelate on cloned inner joins to prevent CorrelateSolver + // from processing nested semi-joins in the cloned subtree. clone.PreferCorrelate = false clone.SetChildren(children...) return &clone, true diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index a0ab91aa20ed0..bd65014dbfc5b 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -567,16 +567,37 @@ func shouldTryOrderAwareReorderRound(sessVars *variable.SessionVars) bool { sessVars.StmtCtx.AlternativeLogicalPlanOrderAwareJoinReorder } -type flagAdjustFunc func(uint64) uint64 +func shouldTryCorrelateRound(sessVars *variable.SessionVars) bool { + return sessVars.EnableAlternativeLogicalPlans && + sessVars.StmtCtx.AlternativeLogicalPlanPreferCorrelate +} -var roundList = [...]flagAdjustFunc{ - func(flag uint64) uint64 { return flag &^ rule.FlagDecorrelate }, - func(flag uint64) uint64 { return flag | rule.FlagOrderAwareJoinReorder }, +// alternativeRound describes one alternative logical-plan round. +// adjustFlag adjusts the optimization flags for the round. +// enabled returns true when the round should be attempted. +// setup/cleanup optionally modify session state before/after plan building. +type alternativeRound struct { + adjustFlag func(uint64) uint64 + enabled func(*variable.SessionVars) bool + setup func(*variable.SessionVars) + cleanup func(*variable.SessionVars) } -var roundEnabled = [...]func(*variable.SessionVars) bool{ - shouldTryNonDecorrelationRound, - shouldTryOrderAwareReorderRound, +var alternativeRounds = [...]alternativeRound{ + { + adjustFlag: func(flag uint64) uint64 { return flag &^ rule.FlagDecorrelate }, + enabled: shouldTryNonDecorrelationRound, + }, + { + adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagOrderAwareJoinReorder }, + enabled: shouldTryOrderAwareReorderRound, + }, + { + adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, + enabled: shouldTryCorrelateRound, + setup: func(sv *variable.SessionVars) { sv.EnableCorrelateSubquery = true }, + cleanup: func(sv *variable.SessionVars) { sv.EnableCorrelateSubquery = false }, + }, } func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW, is infoschema.InfoSchema) (base.Plan, types.NameSlice, float64, error) { @@ -648,8 +669,8 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW return p, names, 0, nil } - for i, adjust := range roundList { - if !roundEnabled[i](sessVars) { + for _, round := range alternativeRounds { + if !round.enabled(sessVars) { continue } restoreLogicalPlanBuildCtx(sessVars, initialLogicalPlanCtx) @@ -659,6 +680,9 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW } }) + if round.setup != nil { + round.setup(sessVars) + } p, names, nonLogical, err = buildAndOptimizeLogicalPlanRound( ctx, sctx, @@ -673,8 +697,11 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW &bestNames, &bestCost, &bestLogicalPlanCtx, - adjust, + round.adjustFlag, ) + if round.cleanup != nil { + round.cleanup(sessVars) + } if err != nil { return nil, nil, 0, err } diff --git a/pkg/planner/util/utilfuncp/func_pointer_misc.go b/pkg/planner/util/utilfuncp/func_pointer_misc.go index ccd77fa700de7..e80ac5585d98e 100644 --- a/pkg/planner/util/utilfuncp/func_pointer_misc.go +++ b/pkg/planner/util/utilfuncp/func_pointer_misc.go @@ -42,10 +42,6 @@ var FindBestTask4BaseLogicalPlan func(p base.LogicalPlan, var FindBestTask4LogicalDataSource func(lp base.LogicalPlan, prop *property.PhysicalProperty) (t base.Task, err error) -// FindBestTask4LogicalJoin handles LogicalJoin with a correlate alternative. -var FindBestTask4LogicalJoin func(p base.LogicalPlan, - prop *property.PhysicalProperty) (bestTask base.Task, err error) - // ExhaustPhysicalPlans4LogicalJoin will be called by LogicalJoin in logicalOp pkg. var ExhaustPhysicalPlans4LogicalJoin func(lp base.LogicalPlan, prop *property.PhysicalProperty) ( []base.PhysicalPlan, bool, error) diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 4d49a14b50928..57f86d15a481e 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -482,6 +482,10 @@ type StatementContext struct { // logical build round produced an order-aware join reorder candidate that is // worth exploring in a dedicated alternative round. AlternativeLogicalPlanOrderAwareJoinReorder bool + // AlternativeLogicalPlanPreferCorrelate indicates whether the current logical + // build round encountered a non-correlated IN subquery eligible for the + // correlate-to-Apply alternative. + AlternativeLogicalPlanPreferCorrelate bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -662,6 +666,7 @@ func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { sc.AlternativeLogicalPlanDecorrelatedApply = false sc.AlternativeLogicalPlanSameOrderIndexJoin = false sc.AlternativeLogicalPlanOrderAwareJoinReorder = false + sc.AlternativeLogicalPlanPreferCorrelate = false } // MarkAlternativeLogicalPlanDecorrelatedApply records that at least one Apply has @@ -682,6 +687,13 @@ func (sc *StatementContext) MarkAlternativeLogicalPlanOrderAwareJoinReorder() { sc.AlternativeLogicalPlanOrderAwareJoinReorder = true } +// MarkAlternativeLogicalPlanPreferCorrelate records that the current logical +// build round encountered a non-correlated IN subquery that is eligible for +// the correlate-to-Apply alternative. +func (sc *StatementContext) MarkAlternativeLogicalPlanPreferCorrelate() { + sc.AlternativeLogicalPlanPreferCorrelate = true +} + // CtxID returns the context id of the statement func (sc *StatementContext) CtxID() uint64 { return sc.ctxID diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index bb08e6045e03d..fbd7e5ba29995 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -343,10 +343,6 @@ const ( // inner-join with aggregation (equivalent to SEMI_JOIN_REWRITE() hint). TiDBOptEnableSemiJoinRewrite = "tidb_opt_enable_semi_join_rewrite" - // TiDBOptEnableCorrelateSubquery controls conversion of non-correlated semi-joins - // back to correlated Apply (reverse of decorrelation). - TiDBOptEnableCorrelateSubquery = "tidb_opt_enable_correlate_subquery" - // TiDBOptEnableCorrelationAdjustment is used to indicates if enable correlation adjustment. TiDBOptEnableCorrelationAdjustment = "tidb_opt_enable_correlation_adjustment" @@ -1471,7 +1467,6 @@ const ( DefOptEnableNoDecorrelateInSelect = false DefOptEnableAlternativeLogicalPlans = false DefOptEnableSemiJoinRewrite = false - DefOptEnableCorrelateSubquery = false DefBatchInsert = false DefBatchDelete = false DefBatchCommit = false diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go index 9780e1b52deea..bc7aca51af96c 100644 --- a/pkg/sessionctx/variable/session.go +++ b/pkg/sessionctx/variable/session.go @@ -1217,7 +1217,9 @@ type SessionVars struct { // EnableSemiJoinRewrite enables the SEMI_JOIN_REWRITE hint for subqueries in the where clause. EnableSemiJoinRewrite bool - // EnableCorrelateSubquery enables conversion of non-correlated semi-joins to correlated Apply. + // EnableCorrelateSubquery is an internal flag (not user-facing) toggled by the + // correlate alternative round to enable conversion of non-correlated semi-joins + // to correlated Apply during plan building. EnableCorrelateSubquery bool // AllowProjectionPushDown enables pushdown projection on TiKV. @@ -2347,7 +2349,6 @@ func NewSessionVars(hctx HookContext) *SessionVars { EnableNoDecorrelateInSelect: vardef.DefOptEnableNoDecorrelateInSelect, EnableAlternativeLogicalPlans: vardef.DefOptEnableAlternativeLogicalPlans, EnableSemiJoinRewrite: vardef.DefOptEnableSemiJoinRewrite, - EnableCorrelateSubquery: vardef.DefOptEnableCorrelateSubquery, RetryLimit: vardef.DefTiDBRetryLimit, DisableTxnAutoRetry: vardef.DefTiDBDisableTxnAutoRetry, DDLReorgPriority: kv.PriorityLow, diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index 98cf58ecfc34f..e8ffc994bb9a2 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -342,10 +342,6 @@ var defaultSysVars = []*SysVar{ s.EnableSemiJoinRewrite = TiDBOptOn(val) return nil }}, - {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptEnableCorrelateSubquery, Value: BoolToOnOff(vardef.DefOptEnableCorrelateSubquery), Type: vardef.TypeBool, SetSession: func(s *SessionVars, val string) error { - s.EnableCorrelateSubquery = TiDBOptOn(val) - return nil - }}, {Scope: vardef.ScopeSession, Name: vardef.TiDBDDLReorgPriority, Value: "PRIORITY_LOW", Type: vardef.TypeEnum, skipInit: true, PossibleValues: []string{"PRIORITY_LOW", "PRIORITY_NORMAL", "PRIORITY_HIGH"}, SetSession: func(s *SessionVars, val string) error { s.setDDLReorgPriority(val) return nil From 261c6e82cb6d994912312dee71cc6bbdd1d8ec20 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 6 Apr 2026 15:41:48 -0700 Subject: [PATCH 14/24] build error --- pkg/planner/core/optimizer_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/planner/core/optimizer_test.go b/pkg/planner/core/optimizer_test.go index a68dec4d4b671..36a2891d5e2ac 100644 --- a/pkg/planner/core/optimizer_test.go +++ b/pkg/planner/core/optimizer_test.go @@ -15,6 +15,7 @@ package core import ( + "math" "math/bits" "reflect" "strings" From b4b1be2993b8efeaf083ce18e3001bdc66271d0b Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 6 Apr 2026 17:37:31 -0700 Subject: [PATCH 15/24] review comments after refactor --- pkg/bindinfo/binding_plan_generation.go | 6 ++- .../core/casetest/rule/rule_correlate_test.go | 28 +++++++++++-- .../rule/testdata/correlate_suite_in.json | 4 +- .../rule/testdata/correlate_suite_out.json | 40 +++++++++++++++++++ .../rule/testdata/correlate_suite_xut.json | 40 +++++++++++++++++++ pkg/planner/core/rule_correlate.go | 31 ++++++++++++++ 6 files changed, 143 insertions(+), 6 deletions(-) diff --git a/pkg/bindinfo/binding_plan_generation.go b/pkg/bindinfo/binding_plan_generation.go index 802e5af35be9f..c672f6f96ebd0 100644 --- a/pkg/bindinfo/binding_plan_generation.go +++ b/pkg/bindinfo/binding_plan_generation.go @@ -439,6 +439,8 @@ func genPlanUnderState(sctx sessionctx.Context, stmt ast.StmtNode, state *state) sctx.GetSessionVars().EnableSemiJoinRewrite = state.varValues[i].(bool) case vardef.TiDBOptSelectivityFactor: sctx.GetSessionVars().SelectivityFactor = state.varValues[i].(float64) + case vardef.TiDBOptEnableAlternativeLogicalPlans: + sctx.GetSessionVars().EnableAlternativeLogicalPlans = state.varValues[i].(bool) default: return nil, fmt.Errorf("unsupported variable %s in plan generation", varName) } @@ -547,7 +549,7 @@ func adjustVar(varName string, varVal any) (newVarVal any, err error) { } // increase 0.1 each step return v + 0.1, nil - case vardef.TiDBOptPreferRangeScan, vardef.TiDBOptEnableNoDecorrelateInSelect, vardef.TiDBOptAlwaysKeepJoinKey, vardef.TiDBOptEnableSemiJoinRewrite: // flip the switch + case vardef.TiDBOptPreferRangeScan, vardef.TiDBOptEnableNoDecorrelateInSelect, vardef.TiDBOptAlwaysKeepJoinKey, vardef.TiDBOptEnableSemiJoinRewrite, vardef.TiDBOptEnableAlternativeLogicalPlans: // flip the switch return !varVal.(bool), nil } return nil, fmt.Errorf("unsupported variable %s in plan generation", varName) @@ -640,6 +642,8 @@ func getStartState(vars []string, fixes []uint64, indexHintCount int) (*state, e s.varValues = append(s.varValues, vardef.DefOptSelectivityFactor) case vardef.TiDBOptCartesianJoinOrderThreshold: s.varValues = append(s.varValues, vardef.DefOptCartesianJoinOrderThreshold) + case vardef.TiDBOptEnableAlternativeLogicalPlans: + s.varValues = append(s.varValues, vardef.DefOptEnableAlternativeLogicalPlans) default: return nil, fmt.Errorf("unsupported variable %s in plan generation", varName) } diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go index c027b91210eb5..f510ad4987de4 100644 --- a/pkg/planner/core/casetest/rule/rule_correlate_test.go +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -77,14 +77,14 @@ func TestCorrelateAlternativeChoosesApply(t *testing.T) { // Without alternative plans: standard InnerJoin+Agg path produces IndexJoin. tk.MustExec("set tidb_opt_enable_alternative_logical_plans = OFF") rows := tk.MustQuery("explain format = 'brief' " + sql).Rows() - require.True(t, strings.Contains(rows[0][0].(string), "IndexJoin"), - "without alternative plans, expected IndexJoin, got: %s", rows[0][0]) + require.True(t, explainContains(rows, "IndexJoin"), + "without alternative plans, expected IndexJoin in plan:\n%s", joinExplainRows(rows)) // With alternative plans: correlate round produces Apply (cheaper than IndexJoin+StreamAgg). tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") rows = tk.MustQuery("explain format = 'brief' " + sql).Rows() - require.True(t, strings.Contains(rows[0][0].(string), "Apply"), - "with alternative plans, expected Apply, got: %s", rows[0][0]) + require.True(t, explainContains(rows, "Apply"), + "with alternative plans, expected Apply in plan:\n%s", joinExplainRows(rows)) // Verify correct results in both modes. tk.MustExec("set tidb_opt_enable_alternative_logical_plans = OFF") @@ -127,6 +127,26 @@ func TestCorrelate(tt *testing.T) { }) } +// explainContains scans all explain rows for a substring in the operator column. +func explainContains(rows [][]any, substr string) bool { + for _, row := range rows { + if strings.Contains(row[0].(string), substr) { + return true + } + } + return false +} + +// joinExplainRows formats explain rows into a single string for debug output. +func joinExplainRows(rows [][]any) string { + var sb strings.Builder + for _, row := range rows { + sb.WriteString(row[0].(string)) + sb.WriteByte('\n') + } + return sb.String() +} + // TestCorrelateWithCostFactors verifies that when hash/merge join cost factors // are increased, the correlate alternative round wins and produces Apply-based // plans with correlated index access for cases that normally choose HashJoin. diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json index d974904bc062b..b336f9a8cee93 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_in.json @@ -16,6 +16,7 @@ "select * from t1 where a in (select a from t2 group by a)", "select * from t1 where a in (select a from t2 where b > 1 group by a)", "select * from t1 where a in (select a from t2 limit 10)", + "select * from t1 where a in (select a from t2 order by a limit 10)", "select * from t1 where b = 1 and a in (select a from t2)", "select * from t1 where b = 1 and exists (select 1 from t2 where t2.a = t1.a) limit 1", "select * from t1 where b = 1 and a not in (select a from t2) limit 1", @@ -31,7 +32,8 @@ "select * from t1 where exists (select 1 from t2 where t2.a > t1.a)", "select * from t1 where exists (select 1 from t2 where t2.a = t1.a and t2.b > t1.b)", "select * from t1 where a in (select a from t2) order by a limit 10", - "select * from t1 where a in (select a from t2 where b > 1)" + "select * from t1 where a in (select a from t2 where b > 1)", + "select * from t1 where a in (select a from t2 order by a limit 10)" ] } ] diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json index c9be9de330595..ee0fe604da9fb 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_out.json @@ -234,6 +234,26 @@ "2 2" ] }, + { + "SQL": "select * from t1 where a in (select a from t2 order by a limit 10)", + "Plan": [ + "IndexHashJoin 10.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─StreamAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─Selection 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─IndexLookUp(Probe) 10.00 root ", + " ├─Selection(Build) 10.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10.01 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, { "SQL": "select * from t1 where b = 1 and a in (select a from t2)", "Plan": [ @@ -420,6 +440,26 @@ "1 1", "2 2" ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 order by a limit 10)", + "Plan": [ + "IndexHashJoin 10.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─StreamAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─Selection 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─IndexLookUp(Probe) 10.00 root ", + " ├─Selection(Build) 10.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10.01 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } diff --git a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json index c9be9de330595..ee0fe604da9fb 100644 --- a/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json +++ b/pkg/planner/core/casetest/rule/testdata/correlate_suite_xut.json @@ -234,6 +234,26 @@ "2 2" ] }, + { + "SQL": "select * from t1 where a in (select a from t2 order by a limit 10)", + "Plan": [ + "IndexHashJoin 10.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─StreamAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─Selection 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─IndexLookUp(Probe) 10.00 root ", + " ├─Selection(Build) 10.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10.01 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] + }, { "SQL": "select * from t1 where b = 1 and a in (select a from t2)", "Plan": [ @@ -420,6 +440,26 @@ "1 1", "2 2" ] + }, + { + "SQL": "select * from t1 where a in (select a from t2 order by a limit 10)", + "Plan": [ + "IndexHashJoin 10.00 root inner join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─StreamAgg(Build) 8.00 root group by:test.t2.a, funcs:firstrow(test.t2.a)->test.t2.a", + "│ └─Selection 8.00 root not(isnull(test.t2.a))", + "│ └─Limit 10.00 root offset:0, count:10", + "│ └─IndexReader 10.00 root index:Limit", + "│ └─Limit 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexFullScan 10.00 cop[tikv] table:t2, index:a(a) keep order:true, stats:pseudo", + "└─IndexLookUp(Probe) 10.00 root ", + " ├─Selection(Build) 10.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 10.01 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1 1", + "2 2" + ] } ] } diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 0206684d29888..07193522de012 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -276,6 +276,10 @@ func cloneLogicalSubtree(p base.LogicalPlan) (base.LogicalPlan, bool) { return cloneAggregation(op) case *logicalop.LogicalLimit: return cloneLimit(op) + case *logicalop.LogicalSort: + return cloneSort(op) + case *logicalop.LogicalTopN: + return cloneTopN(op) default: // Unknown operator type — cannot safely clone. Return failure // so the caller aborts the correlate optimization. @@ -398,6 +402,33 @@ func cloneLimit(lim *logicalop.LogicalLimit) (*logicalop.LogicalLimit, bool) { return &clone, true } +func cloneSort(s *logicalop.LogicalSort) (*logicalop.LogicalSort, bool) { + children, ok := cloneWithChildren(s) + if !ok { + return nil, false + } + clone := *s + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) + clone.ByItems = append([]*util.ByItems(nil), s.ByItems...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneTopN(tn *logicalop.LogicalTopN) (*logicalop.LogicalTopN, bool) { + children, ok := cloneWithChildren(tn) + if !ok { + return nil, false + } + clone := *tn + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + tn.SCtx(), tn.TP(), &clone, tn.QueryBlockOffset()) + clone.SetSchema(tn.Schema().Clone()) + clone.ByItems = append([]*util.ByItems(nil), tn.ByItems...) + clone.SetChildren(children...) + return &clone, true +} + // freshAccessPath creates a new AccessPath with only the structural identity // fields from the source path (Index, StoreType, handle flags, hint flags). // Analysis fields (Ranges, AccessConds, IdxCols, etc.) are left at zero so From 88bfc9b509db16b620199dc8382b99f5d99da412 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 6 Apr 2026 19:00:33 -0700 Subject: [PATCH 16/24] claude review --- pkg/planner/core/rule_correlate.go | 13 +++++++++ pkg/planner/optimize.go | 47 ++++++++++++++++-------------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 07193522de012..9b817745c5b8b 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -18,9 +18,11 @@ import ( "context" "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/expression/aggregation" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/planner/core/base" "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" + "github.com/pingcap/tidb/pkg/planner/property" "github.com/pingcap/tidb/pkg/planner/util" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/logutil" @@ -372,6 +374,7 @@ func cloneProjection(proj *logicalop.LogicalProjection) (*logicalop.LogicalProje clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( proj.SCtx(), proj.TP(), &clone, proj.QueryBlockOffset()) clone.SetSchema(proj.Schema().Clone()) + clone.Exprs = append([]expression.Expression(nil), proj.Exprs...) clone.SetChildren(children...) return &clone, true } @@ -385,6 +388,8 @@ func cloneAggregation(agg *logicalop.LogicalAggregation) (*logicalop.LogicalAggr clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( agg.SCtx(), agg.TP(), &clone, agg.QueryBlockOffset()) clone.SetSchema(agg.Schema().Clone()) + clone.AggFuncs = append([]*aggregation.AggFuncDesc(nil), agg.AggFuncs...) + clone.GroupByItems = append([]expression.Expression(nil), agg.GroupByItems...) clone.SetChildren(children...) return &clone, true } @@ -398,6 +403,9 @@ func cloneLimit(lim *logicalop.LogicalLimit) (*logicalop.LogicalLimit, bool) { clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( lim.SCtx(), lim.TP(), &clone, lim.QueryBlockOffset()) clone.SetSchema(lim.Schema().Clone()) + if len(lim.PartitionBy) > 0 { + clone.PartitionBy = append([]property.SortItem(nil), lim.PartitionBy...) + } clone.SetChildren(children...) return &clone, true } @@ -410,6 +418,8 @@ func cloneSort(s *logicalop.LogicalSort) (*logicalop.LogicalSort, bool) { clone := *s clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) + // LogicalSort embeds BaseLogicalPlan (not LogicalSchemaProducer), + // so it inherits schema from its child — no SetSchema needed. clone.ByItems = append([]*util.ByItems(nil), s.ByItems...) clone.SetChildren(children...) return &clone, true @@ -425,6 +435,9 @@ func cloneTopN(tn *logicalop.LogicalTopN) (*logicalop.LogicalTopN, bool) { tn.SCtx(), tn.TP(), &clone, tn.QueryBlockOffset()) clone.SetSchema(tn.Schema().Clone()) clone.ByItems = append([]*util.ByItems(nil), tn.ByItems...) + if len(tn.PartitionBy) > 0 { + clone.PartitionBy = append([]property.SortItem(nil), tn.PartitionBy...) + } clone.SetChildren(children...) return &clone, true } diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index bd65014dbfc5b..3e9156db50304 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -680,28 +680,31 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW } }) - if round.setup != nil { - round.setup(sessVars) - } - p, names, nonLogical, err = buildAndOptimizeLogicalPlanRound( - ctx, - sctx, - node, - is, - hintProcessor, - &checked, - &optimizeStarted, - &beginOpt, - needRestoreLogicalPlanCtx, - &bestPlan, - &bestNames, - &bestCost, - &bestLogicalPlanCtx, - round.adjustFlag, - ) - if round.cleanup != nil { - round.cleanup(sessVars) - } + // Use a closure so that defer-based cleanup runs at the end of each + // iteration, not at function exit. This ensures session state (e.g. + // EnableCorrelateSubquery) is restored even if the round panics. + func() { + if round.setup != nil { + round.setup(sessVars) + defer round.cleanup(sessVars) + } + p, names, nonLogical, err = buildAndOptimizeLogicalPlanRound( + ctx, + sctx, + node, + is, + hintProcessor, + &checked, + &optimizeStarted, + &beginOpt, + needRestoreLogicalPlanCtx, + &bestPlan, + &bestNames, + &bestCost, + &bestLogicalPlanCtx, + round.adjustFlag, + ) + }() if err != nil { return nil, nil, 0, err } From 9e14702cddd26ee035ea7c4571cdb5d9c2058fc1 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 7 Apr 2026 15:43:02 -0700 Subject: [PATCH 17/24] add parallel apply --- pkg/planner/BUILD.bazel | 2 + pkg/planner/core/casetest/rule/BUILD.bazel | 2 +- .../core/casetest/rule/rule_correlate_test.go | 62 +++++++++++++++++++ pkg/planner/core/expression_rewriter.go | 7 ++- pkg/planner/core/rule_correlate.go | 7 ++- pkg/planner/optimize.go | 21 ++++++- 6 files changed, 92 insertions(+), 9 deletions(-) diff --git a/pkg/planner/BUILD.bazel b/pkg/planner/BUILD.bazel index 5a9830db679bc..a0adbf6a9ef97 100644 --- a/pkg/planner/BUILD.bazel +++ b/pkg/planner/BUILD.bazel @@ -29,9 +29,11 @@ go_library( "//pkg/util/dbterror/plannererrors", "//pkg/util/hint", "//pkg/util/intest", + "//pkg/util/logutil", "//pkg/util/topsql", "//pkg/util/tracing", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", + "@org_uber_go_zap//:zap", ], ) diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index b501535d61803..f1fc117d0b206 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -21,7 +21,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 34, + shard_count = 35, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/rule/rule_correlate_test.go b/pkg/planner/core/casetest/rule/rule_correlate_test.go index f510ad4987de4..0c3a1b61d2f0c 100644 --- a/pkg/planner/core/casetest/rule/rule_correlate_test.go +++ b/pkg/planner/core/casetest/rule/rule_correlate_test.go @@ -15,6 +15,7 @@ package rule import ( + "fmt" "strings" "testing" @@ -147,6 +148,67 @@ func joinExplainRows(rows [][]any) string { return sb.String() } +// TestCorrelateParallelApply verifies that when the correlate alternative round +// produces an Apply plan and tidb_enable_parallel_apply is ON, the Apply is +// executed with parallel concurrency. This tests the interaction between the +// correlate optimization (converting decorrelated semi-join back to Apply) and +// the parallel apply executor. +func TestCorrelateParallelApply(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1 (a int not null, b int, key(a))") + tk.MustExec("create table t2 (a int not null, b int, key(a))") + tk.MustExec("insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5)") + tk.MustExec("insert into t2 values (1,10),(2,20),(3,30)") + + sql := "select * from t1 where b = 1 and a in (select a from t2)" + + // Enable correlate alternative + parallel apply. + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") + tk.MustExec("set tidb_enable_parallel_apply = ON") + tk.MustExec("set tidb_executor_concurrency = 5") + + // Verify the plan contains Apply (correlate alternative won). + rows := tk.MustQuery("explain format = 'brief' " + sql).Rows() + require.True(t, explainContains(rows, "Apply"), + "with correlate alternative + parallel apply, expected Apply in plan:\n%s", joinExplainRows(rows)) + + // Verify EXPLAIN ANALYZE reports Concurrency > 1 for the Apply. + analyzeRows := tk.MustQuery("explain analyze " + sql).Rows() + foundConcurrency := false + for _, row := range analyzeRows { + line := fmt.Sprintf("%v", row) + if strings.Contains(line, "Apply") && strings.Contains(line, "Concurrency:") { + idx := strings.Index(line, "Concurrency:") + if idx >= 0 { + rest := line[idx+len("Concurrency:"):] + var n int + if _, err := fmt.Sscanf(rest, "%d", &n); err == nil && n > 1 { + foundConcurrency = true + } + } + break + } + } + require.True(t, foundConcurrency, + "EXPLAIN ANALYZE must report Concurrency > 1 for Apply when parallel_apply is on") + + // Verify correctness: parallel + correlate must match serial + no correlate. + tk.MustExec("set tidb_enable_parallel_apply = OFF") + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = OFF") + serialRows := tk.MustQuery(sql).Rows() + + tk.MustExec("set tidb_enable_parallel_apply = ON") + tk.MustExec("set tidb_opt_enable_alternative_logical_plans = ON") + parallelRows := tk.MustQuery(sql).Rows() + + require.Equal(t, serialRows, parallelRows, + "correlate alternative + parallel apply must produce the same result as standard path") +} + // TestCorrelateWithCostFactors verifies that when hash/merge join cost factors // are increased, the correlate alternative round wins and produces Apply-based // plans with correlated index access for cases that normally choose HashJoin. diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 95c61d1f1da13..b523a442dcf28 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1084,7 +1084,10 @@ func (er *expressionRewriter) handleExistSubquery(ctx context.Context, planCtx * noDecorrelate := isNoDecorrelate(planCtx, corCols, hintFlags, handlingExistsSubquery) // When EnableCorrelateSubquery is ON (set by the correlate alternative round), // prevent decorrelation of correlated subqueries so they stay as Apply with index lookups. - if !noDecorrelate && len(corCols) > 0 { + // Skip when SEMI_JOIN_REWRITE() hint is present, since that hint explicitly requires + // decorrelation and would be silently ineffective on LogicalApply nodes. + semiJoinRewriteHint := hintFlags&hint.HintFlagSemiJoinRewrite > 0 + if !noDecorrelate && len(corCols) > 0 && !semiJoinRewriteHint { b.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableAlternativeLogicalPlans) if b.ctx.GetSessionVars().EnableCorrelateSubquery { noDecorrelate = true @@ -1105,7 +1108,7 @@ func (er *expressionRewriter) handleExistSubquery(ctx context.Context, planCtx * } } np = er.popExistsSubPlan(planCtx, np) - semiJoinRewrite := hintFlags&hint.HintFlagSemiJoinRewrite > 0 + semiJoinRewrite := semiJoinRewriteHint if semiJoinRewrite && hintFlags&hint.HintFlagNoDecorrelate > 0 { b.ctx.GetSessionVars().StmtCtx.SetHintWarning( "NO_DECORRELATE() and SEMI_JOIN_REWRITE() are in conflict. Both will be ineffective.") diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 9b817745c5b8b..096b0fdbf817c 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -16,6 +16,7 @@ package core import ( "context" + "fmt" "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/expression/aggregation" @@ -38,12 +39,12 @@ type CorrelateSolver struct{} func (s *CorrelateSolver) Optimize(ctx context.Context, p base.LogicalPlan) (retPlan base.LogicalPlan, retChanged bool, retErr error) { defer func() { if r := recover(); r != nil { - logutil.BgLogger().Warn("CorrelateSolver panic, returning original plan", + logutil.BgLogger().Warn("CorrelateSolver panic", zap.Any("recover", r), zap.Stack("stack")) - retPlan = p + retPlan = nil retChanged = false - retErr = nil + retErr = fmt.Errorf("CorrelateSolver panic: %v", r) } }() return s.correlate(ctx, p) diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 3e9156db50304..54964919d2d22 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -48,6 +48,8 @@ import ( "github.com/pingcap/tidb/pkg/util/dbterror/plannererrors" "github.com/pingcap/tidb/pkg/util/hint" "github.com/pingcap/tidb/pkg/util/intest" + "github.com/pingcap/tidb/pkg/util/logutil" + "go.uber.org/zap" "github.com/pingcap/tidb/pkg/util/topsql" "github.com/pingcap/tidb/pkg/util/tracing" ) @@ -669,10 +671,19 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW return p, names, 0, nil } + // Pre-compute which rounds are enabled based on the signals from the first + // (default) build. This prevents signal leakage: alternative rounds rebuild + // the plan and may set AlternativeLogicalPlan* signals as a side effect, + // which are not reset by restoreLogicalPlanBuildCtx. Evaluating enabled() + // upfront ensures each round's eligibility is determined solely by the + // original build's signals. + enabledRounds := make([]alternativeRound, 0, len(alternativeRounds)) for _, round := range alternativeRounds { - if !round.enabled(sessVars) { - continue + if round.enabled(sessVars) { + enabledRounds = append(enabledRounds, round) } + } + for _, round := range enabledRounds { restoreLogicalPlanBuildCtx(sessVars, initialLogicalPlanCtx) failpoint.Inject("failIfAlternativeLogicalPlanRoundTriggered", func(val failpoint.Value) { if testSQL, ok := val.(string); ok && testSQL == node.Node.OriginalText() { @@ -706,7 +717,11 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW ) }() if err != nil { - return nil, nil, 0, err + // Alternative rounds are optional optimizations. If one fails, + // log and continue — the first round's plan is still valid. + logutil.BgLogger().Warn("alternative logical plan round failed", + zap.Error(err)) + continue } if nonLogical { return p, names, 0, nil From ac97f9eb5166728a59472d4a3f442a04cc2f3b04 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 7 Apr 2026 15:56:24 -0700 Subject: [PATCH 18/24] import reorder --- pkg/planner/optimize.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 54964919d2d22..b9bebaa118c2e 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -49,9 +49,9 @@ import ( "github.com/pingcap/tidb/pkg/util/hint" "github.com/pingcap/tidb/pkg/util/intest" "github.com/pingcap/tidb/pkg/util/logutil" - "go.uber.org/zap" "github.com/pingcap/tidb/pkg/util/topsql" "github.com/pingcap/tidb/pkg/util/tracing" + "go.uber.org/zap" ) // getPlanFromNonPreparedPlanCache tries to get an available cached plan from the NonPrepared Plan Cache for this stmt. From e53692acf8e36604f0d66614bb826b1bee04f0ab Mon Sep 17 00:00:00 2001 From: tpp Date: Fri, 10 Apr 2026 08:40:10 -0700 Subject: [PATCH 19/24] review comments --- pkg/planner/core/expression_rewriter.go | 3 +++ pkg/planner/optimize.go | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index b523a442dcf28..38e32c9a95a3c 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1299,6 +1299,9 @@ func (er *expressionRewriter) handleInSubquery(ctx context.Context, planCtx *exp // prevent decorrelation of correlated IN subqueries so they stay as Apply with index lookups. if !noDecorrelate && len(corCols) > 0 && !v.Not { planCtx.builder.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableAlternativeLogicalPlans) + if planCtx.builder.ctx.GetSessionVars().EnableAlternativeLogicalPlans { + planCtx.builder.ctx.GetSessionVars().StmtCtx.MarkAlternativeLogicalPlanPreferCorrelate() + } if planCtx.builder.ctx.GetSessionVars().EnableCorrelateSubquery { noDecorrelate = true } diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index b9bebaa118c2e..b7b0de698e454 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -594,12 +594,20 @@ var alternativeRounds = [...]alternativeRound{ adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagOrderAwareJoinReorder }, enabled: shouldTryOrderAwareReorderRound, }, - { - adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, - enabled: shouldTryCorrelateRound, - setup: func(sv *variable.SessionVars) { sv.EnableCorrelateSubquery = true }, - cleanup: func(sv *variable.SessionVars) { sv.EnableCorrelateSubquery = false }, - }, + func() alternativeRound { + var old bool + return alternativeRound{ + adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, + enabled: shouldTryCorrelateRound, + setup: func(sv *variable.SessionVars) { + old = sv.EnableCorrelateSubquery + sv.EnableCorrelateSubquery = true + }, + cleanup: func(sv *variable.SessionVars) { + sv.EnableCorrelateSubquery = old + }, + } + }(), } func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW, is infoschema.InfoSchema) (base.Plan, types.NameSlice, float64, error) { From 7d49f71cf3d319d244cfa5db2c23082826102b1c Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 11 Apr 2026 15:27:50 -0700 Subject: [PATCH 20/24] copilot review comments --- pkg/planner/core/rule_correlate.go | 5 +++++ pkg/planner/optimize.go | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index 096b0fdbf817c..de9fff92b897e 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -447,6 +447,11 @@ func cloneTopN(tn *logicalop.LogicalTopN) (*logicalop.LogicalTopN, bool) { // fields from the source path (Index, StoreType, handle flags, hint flags). // Analysis fields (Ranges, AccessConds, IdxCols, etc.) are left at zero so // that fillIndexPath / deriveTablePathStats start from a clean state. +// +// Index-merge fields (PartialIndexPaths, PartialAlternativeIndexPaths, etc.) +// are intentionally omitted: AllPossibleAccessPaths contains only individual +// index paths; index merge paths are synthesized later by generateIndexMergePath +// which runs as part of DeriveStats after fillIndexPath populates these fresh paths. func freshAccessPath(src *util.AccessPath) *util.AccessPath { return &util.AccessPath{ Index: src.Index, diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index b7b0de698e454..4cc9fab42a3b8 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -579,6 +579,7 @@ func shouldTryCorrelateRound(sessVars *variable.SessionVars) bool { // enabled returns true when the round should be attempted. // setup/cleanup optionally modify session state before/after plan building. type alternativeRound struct { + name string adjustFlag func(uint64) uint64 enabled func(*variable.SessionVars) bool setup func(*variable.SessionVars) @@ -587,16 +588,19 @@ type alternativeRound struct { var alternativeRounds = [...]alternativeRound{ { + name: "non-decorrelate", adjustFlag: func(flag uint64) uint64 { return flag &^ rule.FlagDecorrelate }, enabled: shouldTryNonDecorrelationRound, }, { + name: "order-aware-reorder", adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagOrderAwareJoinReorder }, enabled: shouldTryOrderAwareReorderRound, }, func() alternativeRound { var old bool return alternativeRound{ + name: "correlate", adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, enabled: shouldTryCorrelateRound, setup: func(sv *variable.SessionVars) { @@ -728,6 +732,7 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW // Alternative rounds are optional optimizations. If one fails, // log and continue — the first round's plan is still valid. logutil.BgLogger().Warn("alternative logical plan round failed", + zap.String("round", round.name), zap.Error(err)) continue } From 4c323381200d485f256f45566668a5888a9dc93e Mon Sep 17 00:00:00 2001 From: tpp Date: Fri, 17 Apr 2026 07:42:11 -0700 Subject: [PATCH 21/24] move clones per review --- pkg/planner/core/plan_clone_utils.go | 215 +++++++++++++++++++++++++++ pkg/planner/core/rule_correlate.go | 213 -------------------------- 2 files changed, 215 insertions(+), 213 deletions(-) diff --git a/pkg/planner/core/plan_clone_utils.go b/pkg/planner/core/plan_clone_utils.go index eec7eea729d15..2d97d11690bd8 100644 --- a/pkg/planner/core/plan_clone_utils.go +++ b/pkg/planner/core/plan_clone_utils.go @@ -16,8 +16,12 @@ package core import ( "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/expression/aggregation" "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" "github.com/pingcap/tidb/pkg/planner/core/operator/physicalop" + "github.com/pingcap/tidb/pkg/planner/property" + "github.com/pingcap/tidb/pkg/planner/util" "github.com/pingcap/tidb/pkg/planner/util/utilfuncp" "github.com/pingcap/tidb/pkg/types" ) @@ -69,3 +73,214 @@ func FastClonePointGetForPlanCache(newCtx base.PlanContext, src, dst *physicalop // cost, planCostInit, planCost, planCostVer2, accessCols return dst } + +// cloneLogicalSubtree creates a shallow clone of the logical plan subtree, +// ensuring each node has a fresh plan ID and independent mutable state (children, +// conditions, AllConds). Immutable data (table info, column info, etc.) is shared. +// This is used to build the Apply alternative's inner plan without modifying the +// Join's original inner subtree when PPD pushes correlated conditions down. +// Returns (clone, true) on success, or (nil, false) if an unhandled operator type +// is encountered. In the failure case, the caller must abort the correlate +// optimization to avoid corrupting the original subtree. +func cloneLogicalSubtree(p base.LogicalPlan) (base.LogicalPlan, bool) { + switch op := p.(type) { + case *logicalop.DataSource: + return cloneDataSource(op), true + case *logicalop.LogicalJoin: + return cloneJoin(op) + case *logicalop.LogicalSelection: + return cloneSelection(op) + case *logicalop.LogicalProjection: + return cloneProjection(op) + case *logicalop.LogicalAggregation: + return cloneAggregation(op) + case *logicalop.LogicalLimit: + return cloneLimit(op) + case *logicalop.LogicalSort: + return cloneSort(op) + case *logicalop.LogicalTopN: + return cloneTopN(op) + default: + // Unknown operator type — cannot safely clone. Return failure + // so the caller aborts the correlate optimization. + return nil, false + } +} + +func cloneWithChildren(p base.LogicalPlan) ([]base.LogicalPlan, bool) { + children := make([]base.LogicalPlan, len(p.Children())) + for i, child := range p.Children() { + cloned, ok := cloneLogicalSubtree(child) + if !ok { + return nil, false + } + children[i] = cloned + } + return children, true +} + +func cloneDataSource(ds *logicalop.DataSource) *logicalop.DataSource { + clone := *ds + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + ds.SCtx(), ds.TP(), &clone, ds.QueryBlockOffset()) + clone.SetSchema(ds.Schema().Clone()) + // Independent slices that PPD replaces. + clone.AllConds = append([]expression.Expression(nil), ds.AllConds...) + clone.PushedDownConds = append([]expression.Expression(nil), ds.PushedDownConds...) + // Deep-clone AccessPaths so the Join and Apply alternatives have fully + // independent path objects. Stats derivation (fillIndexPath, etc.) mutates + // AccessPath fields in place; without deep cloning, costing one alternative + // can corrupt the other and destabilize CBO. + clone.AllPossibleAccessPaths = make([]*util.AccessPath, len(ds.AllPossibleAccessPaths)) + for i, ap := range ds.AllPossibleAccessPaths { + clone.AllPossibleAccessPaths[i] = ap.Clone() + } + clone.PossibleAccessPaths = make([]*util.AccessPath, len(ds.PossibleAccessPaths)) + for i, ap := range ds.PossibleAccessPaths { + clone.PossibleAccessPaths[i] = ap.Clone() + } + // Preserve original stats so DeriveStats returns early for DataSources + // that don't receive correlated conditions. Without this, DeriveStats + // re-runs fillIndexPath on all DataSources, which fails when conditions + // reference columns that column pruning removed from the schema. + if origStats := ds.StatsInfo(); origStats != nil { + clone.SetStats(origStats) + } + return &clone +} + +func cloneJoin(j *logicalop.LogicalJoin) (*logicalop.LogicalJoin, bool) { + children, ok := cloneWithChildren(j) + if !ok { + return nil, false + } + clone := *j + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + j.SCtx(), j.TP(), &clone, j.QueryBlockOffset()) + clone.SetSchema(j.Schema().Clone()) + // Independent condition slices that PPD may modify. + clone.EqualConditions = append([]*expression.ScalarFunction(nil), j.EqualConditions...) + clone.LeftConditions = append(expression.CNFExprs(nil), j.LeftConditions...) + clone.RightConditions = append(expression.CNFExprs(nil), j.RightConditions...) + clone.OtherConditions = append(expression.CNFExprs(nil), j.OtherConditions...) + // Clear PreferCorrelate on cloned inner joins to prevent CorrelateSolver + // from processing nested semi-joins in the cloned subtree. + clone.PreferCorrelate = false + clone.SetChildren(children...) + return &clone, true +} + +func cloneSelection(s *logicalop.LogicalSelection) (*logicalop.LogicalSelection, bool) { + children, ok := cloneWithChildren(s) + if !ok { + return nil, false + } + clone := *s + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) + clone.Conditions = append(expression.CNFExprs(nil), s.Conditions...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneProjection(proj *logicalop.LogicalProjection) (*logicalop.LogicalProjection, bool) { + children, ok := cloneWithChildren(proj) + if !ok { + return nil, false + } + clone := *proj + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + proj.SCtx(), proj.TP(), &clone, proj.QueryBlockOffset()) + clone.SetSchema(proj.Schema().Clone()) + clone.Exprs = append([]expression.Expression(nil), proj.Exprs...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneAggregation(agg *logicalop.LogicalAggregation) (*logicalop.LogicalAggregation, bool) { + children, ok := cloneWithChildren(agg) + if !ok { + return nil, false + } + clone := *agg + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + agg.SCtx(), agg.TP(), &clone, agg.QueryBlockOffset()) + clone.SetSchema(agg.Schema().Clone()) + clone.AggFuncs = append([]*aggregation.AggFuncDesc(nil), agg.AggFuncs...) + clone.GroupByItems = append([]expression.Expression(nil), agg.GroupByItems...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneLimit(lim *logicalop.LogicalLimit) (*logicalop.LogicalLimit, bool) { + children, ok := cloneWithChildren(lim) + if !ok { + return nil, false + } + clone := *lim + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + lim.SCtx(), lim.TP(), &clone, lim.QueryBlockOffset()) + clone.SetSchema(lim.Schema().Clone()) + if len(lim.PartitionBy) > 0 { + clone.PartitionBy = append([]property.SortItem(nil), lim.PartitionBy...) + } + clone.SetChildren(children...) + return &clone, true +} + +func cloneSort(s *logicalop.LogicalSort) (*logicalop.LogicalSort, bool) { + children, ok := cloneWithChildren(s) + if !ok { + return nil, false + } + clone := *s + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) + // LogicalSort embeds BaseLogicalPlan (not LogicalSchemaProducer), + // so it inherits schema from its child — no SetSchema needed. + clone.ByItems = append([]*util.ByItems(nil), s.ByItems...) + clone.SetChildren(children...) + return &clone, true +} + +func cloneTopN(tn *logicalop.LogicalTopN) (*logicalop.LogicalTopN, bool) { + children, ok := cloneWithChildren(tn) + if !ok { + return nil, false + } + clone := *tn + clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( + tn.SCtx(), tn.TP(), &clone, tn.QueryBlockOffset()) + clone.SetSchema(tn.Schema().Clone()) + clone.ByItems = append([]*util.ByItems(nil), tn.ByItems...) + if len(tn.PartitionBy) > 0 { + clone.PartitionBy = append([]property.SortItem(nil), tn.PartitionBy...) + } + clone.SetChildren(children...) + return &clone, true +} + +// freshAccessPath creates a new AccessPath with only the structural identity +// fields from the source path (Index, StoreType, handle flags, hint flags). +// Analysis fields (Ranges, AccessConds, IdxCols, etc.) are left at zero so +// that fillIndexPath / deriveTablePathStats start from a clean state. +// +// Index-merge fields (PartialIndexPaths, PartialAlternativeIndexPaths, etc.) +// are intentionally omitted: AllPossibleAccessPaths contains only individual +// index paths; index merge paths are synthesized later by generateIndexMergePath +// which runs as part of DeriveStats after fillIndexPath populates these fresh paths. +func freshAccessPath(src *util.AccessPath) *util.AccessPath { + return &util.AccessPath{ + Index: src.Index, + StoreType: src.StoreType, + IsIntHandlePath: src.IsIntHandlePath, + IsCommonHandlePath: src.IsCommonHandlePath, + Forced: src.Forced, + ForceKeepOrder: src.ForceKeepOrder, + ForceNoKeepOrder: src.ForceNoKeepOrder, + ForcePartialOrder: src.ForcePartialOrder, + IsUkShardIndexPath: src.IsUkShardIndexPath, + IndexLookUpPushDownBy: src.IndexLookUpPushDownBy, + NoncacheableReason: src.NoncacheableReason, + } +} diff --git a/pkg/planner/core/rule_correlate.go b/pkg/planner/core/rule_correlate.go index de9fff92b897e..ac7d6e99785ad 100644 --- a/pkg/planner/core/rule_correlate.go +++ b/pkg/planner/core/rule_correlate.go @@ -19,11 +19,9 @@ import ( "fmt" "github.com/pingcap/tidb/pkg/expression" - "github.com/pingcap/tidb/pkg/expression/aggregation" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/planner/core/base" "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" - "github.com/pingcap/tidb/pkg/planner/property" "github.com/pingcap/tidb/pkg/planner/util" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/logutil" @@ -257,217 +255,6 @@ func (*CorrelateSolver) buildCorrelatedCond( return cond, corCol } -// cloneLogicalSubtree creates a shallow clone of the logical plan subtree, -// ensuring each node has a fresh plan ID and independent mutable state (children, -// conditions, AllConds). Immutable data (table info, column info, etc.) is shared. -// This is used to build the Apply alternative's inner plan without modifying the -// Join's original inner subtree when PPD pushes correlated conditions down. -// Returns (clone, true) on success, or (nil, false) if an unhandled operator type -// is encountered. In the failure case, the caller must abort the correlate -// optimization to avoid corrupting the original subtree. -func cloneLogicalSubtree(p base.LogicalPlan) (base.LogicalPlan, bool) { - switch op := p.(type) { - case *logicalop.DataSource: - return cloneDataSource(op), true - case *logicalop.LogicalJoin: - return cloneJoin(op) - case *logicalop.LogicalSelection: - return cloneSelection(op) - case *logicalop.LogicalProjection: - return cloneProjection(op) - case *logicalop.LogicalAggregation: - return cloneAggregation(op) - case *logicalop.LogicalLimit: - return cloneLimit(op) - case *logicalop.LogicalSort: - return cloneSort(op) - case *logicalop.LogicalTopN: - return cloneTopN(op) - default: - // Unknown operator type — cannot safely clone. Return failure - // so the caller aborts the correlate optimization. - return nil, false - } -} - -func cloneWithChildren(p base.LogicalPlan) ([]base.LogicalPlan, bool) { - children := make([]base.LogicalPlan, len(p.Children())) - for i, child := range p.Children() { - cloned, ok := cloneLogicalSubtree(child) - if !ok { - return nil, false - } - children[i] = cloned - } - return children, true -} - -func cloneDataSource(ds *logicalop.DataSource) *logicalop.DataSource { - clone := *ds - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - ds.SCtx(), ds.TP(), &clone, ds.QueryBlockOffset()) - clone.SetSchema(ds.Schema().Clone()) - // Independent slices that PPD replaces. - clone.AllConds = append([]expression.Expression(nil), ds.AllConds...) - clone.PushedDownConds = append([]expression.Expression(nil), ds.PushedDownConds...) - // Deep-clone AccessPaths so the Join and Apply alternatives have fully - // independent path objects. Stats derivation (fillIndexPath, etc.) mutates - // AccessPath fields in place; without deep cloning, costing one alternative - // can corrupt the other and destabilize CBO. - clone.AllPossibleAccessPaths = make([]*util.AccessPath, len(ds.AllPossibleAccessPaths)) - for i, ap := range ds.AllPossibleAccessPaths { - clone.AllPossibleAccessPaths[i] = ap.Clone() - } - clone.PossibleAccessPaths = make([]*util.AccessPath, len(ds.PossibleAccessPaths)) - for i, ap := range ds.PossibleAccessPaths { - clone.PossibleAccessPaths[i] = ap.Clone() - } - // Preserve original stats so DeriveStats returns early for DataSources - // that don't receive correlated conditions. Without this, DeriveStats - // re-runs fillIndexPath on all DataSources, which fails when conditions - // reference columns that column pruning removed from the schema. - if origStats := ds.StatsInfo(); origStats != nil { - clone.SetStats(origStats) - } - return &clone -} - -func cloneJoin(j *logicalop.LogicalJoin) (*logicalop.LogicalJoin, bool) { - children, ok := cloneWithChildren(j) - if !ok { - return nil, false - } - clone := *j - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - j.SCtx(), j.TP(), &clone, j.QueryBlockOffset()) - clone.SetSchema(j.Schema().Clone()) - // Independent condition slices that PPD may modify. - clone.EqualConditions = append([]*expression.ScalarFunction(nil), j.EqualConditions...) - clone.LeftConditions = append(expression.CNFExprs(nil), j.LeftConditions...) - clone.RightConditions = append(expression.CNFExprs(nil), j.RightConditions...) - clone.OtherConditions = append(expression.CNFExprs(nil), j.OtherConditions...) - // Clear PreferCorrelate on cloned inner joins to prevent CorrelateSolver - // from processing nested semi-joins in the cloned subtree. - clone.PreferCorrelate = false - clone.SetChildren(children...) - return &clone, true -} - -func cloneSelection(s *logicalop.LogicalSelection) (*logicalop.LogicalSelection, bool) { - children, ok := cloneWithChildren(s) - if !ok { - return nil, false - } - clone := *s - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) - clone.Conditions = append(expression.CNFExprs(nil), s.Conditions...) - clone.SetChildren(children...) - return &clone, true -} - -func cloneProjection(proj *logicalop.LogicalProjection) (*logicalop.LogicalProjection, bool) { - children, ok := cloneWithChildren(proj) - if !ok { - return nil, false - } - clone := *proj - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - proj.SCtx(), proj.TP(), &clone, proj.QueryBlockOffset()) - clone.SetSchema(proj.Schema().Clone()) - clone.Exprs = append([]expression.Expression(nil), proj.Exprs...) - clone.SetChildren(children...) - return &clone, true -} - -func cloneAggregation(agg *logicalop.LogicalAggregation) (*logicalop.LogicalAggregation, bool) { - children, ok := cloneWithChildren(agg) - if !ok { - return nil, false - } - clone := *agg - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - agg.SCtx(), agg.TP(), &clone, agg.QueryBlockOffset()) - clone.SetSchema(agg.Schema().Clone()) - clone.AggFuncs = append([]*aggregation.AggFuncDesc(nil), agg.AggFuncs...) - clone.GroupByItems = append([]expression.Expression(nil), agg.GroupByItems...) - clone.SetChildren(children...) - return &clone, true -} - -func cloneLimit(lim *logicalop.LogicalLimit) (*logicalop.LogicalLimit, bool) { - children, ok := cloneWithChildren(lim) - if !ok { - return nil, false - } - clone := *lim - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - lim.SCtx(), lim.TP(), &clone, lim.QueryBlockOffset()) - clone.SetSchema(lim.Schema().Clone()) - if len(lim.PartitionBy) > 0 { - clone.PartitionBy = append([]property.SortItem(nil), lim.PartitionBy...) - } - clone.SetChildren(children...) - return &clone, true -} - -func cloneSort(s *logicalop.LogicalSort) (*logicalop.LogicalSort, bool) { - children, ok := cloneWithChildren(s) - if !ok { - return nil, false - } - clone := *s - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - s.SCtx(), s.TP(), &clone, s.QueryBlockOffset()) - // LogicalSort embeds BaseLogicalPlan (not LogicalSchemaProducer), - // so it inherits schema from its child — no SetSchema needed. - clone.ByItems = append([]*util.ByItems(nil), s.ByItems...) - clone.SetChildren(children...) - return &clone, true -} - -func cloneTopN(tn *logicalop.LogicalTopN) (*logicalop.LogicalTopN, bool) { - children, ok := cloneWithChildren(tn) - if !ok { - return nil, false - } - clone := *tn - clone.BaseLogicalPlan = logicalop.NewBaseLogicalPlan( - tn.SCtx(), tn.TP(), &clone, tn.QueryBlockOffset()) - clone.SetSchema(tn.Schema().Clone()) - clone.ByItems = append([]*util.ByItems(nil), tn.ByItems...) - if len(tn.PartitionBy) > 0 { - clone.PartitionBy = append([]property.SortItem(nil), tn.PartitionBy...) - } - clone.SetChildren(children...) - return &clone, true -} - -// freshAccessPath creates a new AccessPath with only the structural identity -// fields from the source path (Index, StoreType, handle flags, hint flags). -// Analysis fields (Ranges, AccessConds, IdxCols, etc.) are left at zero so -// that fillIndexPath / deriveTablePathStats start from a clean state. -// -// Index-merge fields (PartialIndexPaths, PartialAlternativeIndexPaths, etc.) -// are intentionally omitted: AllPossibleAccessPaths contains only individual -// index paths; index merge paths are synthesized later by generateIndexMergePath -// which runs as part of DeriveStats after fillIndexPath populates these fresh paths. -func freshAccessPath(src *util.AccessPath) *util.AccessPath { - return &util.AccessPath{ - Index: src.Index, - StoreType: src.StoreType, - IsIntHandlePath: src.IsIntHandlePath, - IsCommonHandlePath: src.IsCommonHandlePath, - Forced: src.Forced, - ForceKeepOrder: src.ForceKeepOrder, - ForceNoKeepOrder: src.ForceNoKeepOrder, - ForcePartialOrder: src.ForcePartialOrder, - IsUkShardIndexPath: src.IsUkShardIndexPath, - IndexLookUpPushDownBy: src.IndexLookUpPushDownBy, - NoncacheableReason: src.NoncacheableReason, - } -} - // liftDataSourceConds walks the plan tree and for each DataSource with // non-empty AllConds, wraps it in a Selection node containing those conditions. // This "un-pushes" conditions that the original PPD pushed into DataSources, From dd0f84f638d2039ac7acc2047e36b6be958903c2 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 21 Apr 2026 08:54:40 +0530 Subject: [PATCH 22/24] review comments2 --- pkg/planner/core/optimizer.go | 9 +++------ pkg/planner/core/optimizer_test.go | 2 +- pkg/planner/core/rule/logical_rules.go | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pkg/planner/core/optimizer.go b/pkg/planner/core/optimizer.go index d0768f9658142..aa061de39f5ed 100644 --- a/pkg/planner/core/optimizer.go +++ b/pkg/planner/core/optimizer.go @@ -107,12 +107,12 @@ var optRuleList = []base.LogicalOptRule{ &rule.SyncWaitStatsLoadPoint{}, &JoinReOrderSolver{}, &rule.OuterJoinToSemiJoin{}, + &CorrelateSolver{}, &rule.ColumnPruner{}, // column pruning again at last, note it will mess up the results of buildKeySolver &PushDownSequenceSolver{}, &EliminateUnionAllDualItem{}, &EmptySelectionEliminator{}, &ResolveExpand{}, - &CorrelateSolver{}, } // Interaction Rule List @@ -364,12 +364,9 @@ func adjustOptimizationFlags(flag uint64, logic base.LogicalPlan) uint64 { } // FlagCorrelate is added by the correlate alternative round's flag adjuster, // not here. EnableCorrelateSubquery is an internal flag toggled by the round. - // Recompute FlagPruneColumnsAgain after all conditional flag mutations so - // that conditionally-added flags (FlagCorrelate, FlagPartitionProcessor, …) - // are taken into account. A second column-prune pass is worthwhile when - // any rule above column pruning is enabled. + // A second column-prune pass is worthwhile when any rule above column + // pruning is enabled. if flag&rule.FlagPruneColumns != 0 { - // Mask of all flag bits strictly above FlagPruneColumns. const abovePruneColumns = ^(rule.FlagPruneColumns | (rule.FlagPruneColumns - 1)) if flag&abovePruneColumns != 0 { flag |= rule.FlagPruneColumnsAgain diff --git a/pkg/planner/core/optimizer_test.go b/pkg/planner/core/optimizer_test.go index 36a2891d5e2ac..a0917696839fc 100644 --- a/pkg/planner/core/optimizer_test.go +++ b/pkg/planner/core/optimizer_test.go @@ -620,7 +620,7 @@ func TestOptRuleListFlagAlignment(t *testing.T) { // added/removed without updating the other. // // bits.Len64(lastFlag) == bit-position + 1 == expected list length. - numFlags := bits.Len64(rule.FlagCorrelate) + numFlags := bits.Len64(rule.FlagResolveExpand) require.Equalf(t, numFlags, len(optRuleList), "optRuleList length (%d) does not match Flag* count (%d); "+ "did you add a rule without a flag or vice versa? "+ diff --git a/pkg/planner/core/rule/logical_rules.go b/pkg/planner/core/rule/logical_rules.go index 19209bab6208d..63a27bcef8944 100644 --- a/pkg/planner/core/rule/logical_rules.go +++ b/pkg/planner/core/rule/logical_rules.go @@ -42,12 +42,12 @@ const ( FlagSyncWaitStatsLoadPoint FlagJoinReOrder FlagOuterJoinToSemiJoin + FlagCorrelate FlagPruneColumnsAgain FlagPushDownSequence FlagEliminateUnionAllDualItem FlagEmptySelectionEliminator FlagResolveExpand - FlagCorrelate ) func setPredicatePushDownFlag(u uint64) uint64 { From 448665b4ec51580f35e930801fbabf0e6bef0ba5 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 21 Apr 2026 09:11:20 +0530 Subject: [PATCH 23/24] review comments3 --- pkg/planner/optimize.go | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 945284eaab889..3e5463b430030 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -603,6 +603,11 @@ type alternativeRound struct { cleanup func(*variable.SessionVars) } +// savedEnableCorrelateSubquery holds the pre-round value of +// EnableCorrelateSubquery so setup/cleanup can share it without a closure +// wrapper. Safe because optimize is single-threaded per session. +var savedEnableCorrelateSubquery bool + var alternativeRounds = [...]alternativeRound{ { name: "non-decorrelate", @@ -614,21 +619,18 @@ var alternativeRounds = [...]alternativeRound{ adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagOrderAwareJoinReorder }, enabled: shouldTryOrderAwareReorderRound, }, - func() alternativeRound { - var old bool - return alternativeRound{ - name: "correlate", - adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, - enabled: shouldTryCorrelateRound, - setup: func(sv *variable.SessionVars) { - old = sv.EnableCorrelateSubquery - sv.EnableCorrelateSubquery = true - }, - cleanup: func(sv *variable.SessionVars) { - sv.EnableCorrelateSubquery = old - }, - } - }(), + { + name: "correlate", + adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, + enabled: shouldTryCorrelateRound, + setup: func(sv *variable.SessionVars) { + savedEnableCorrelateSubquery = sv.EnableCorrelateSubquery + sv.EnableCorrelateSubquery = true + }, + cleanup: func(sv *variable.SessionVars) { + sv.EnableCorrelateSubquery = savedEnableCorrelateSubquery + }, + }, } func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW, is infoschema.InfoSchema) (base.Plan, types.NameSlice, float64, error) { From be086e891c781b6bb4797e5fba969ff6e5fc973f Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 21 Apr 2026 14:25:03 +0530 Subject: [PATCH 24/24] update bazel --- pkg/planner/core/casetest/rule/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/planner/core/casetest/rule/BUILD.bazel b/pkg/planner/core/casetest/rule/BUILD.bazel index f1fc117d0b206..a7a33d71ab880 100644 --- a/pkg/planner/core/casetest/rule/BUILD.bazel +++ b/pkg/planner/core/casetest/rule/BUILD.bazel @@ -21,7 +21,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 35, + shard_count = 23, deps = [ "//pkg/config", "//pkg/domain",