diff --git a/pkg/planner/core/BUILD.bazel b/pkg/planner/core/BUILD.bazel index f977988f2a680..42177233f82e8 100644 --- a/pkg/planner/core/BUILD.bazel +++ b/pkg/planner/core/BUILD.bazel @@ -217,6 +217,7 @@ go_test( "find_best_task_test.go", "hint_test.go", "integration_test.go", + "join_reorder_side_effect_test.go", "lateral_join_test.go", "logical_plans_test.go", "main_test.go", diff --git a/pkg/planner/core/join_reorder_side_effect_test.go b/pkg/planner/core/join_reorder_side_effect_test.go new file mode 100644 index 0000000000000..e8ad7a3632bb2 --- /dev/null +++ b/pkg/planner/core/join_reorder_side_effect_test.go @@ -0,0 +1,115 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/domain" + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" + "github.com/pingcap/tidb/pkg/planner/property" + "github.com/pingcap/tidb/pkg/planner/util/coretestsdk" + "github.com/pingcap/tidb/pkg/types" + "github.com/stretchr/testify/require" +) + +func newLegacyTypedLeaf(ctx base.PlanContext, tp byte, count float64) *logicalop.LogicalTableDual { + dual := logicalop.LogicalTableDual{RowCount: 1}.Init(ctx, 0) + dual.SetSchema(expression.NewSchema()) + dual.Schema().Append(&expression.Column{ + UniqueID: ctx.GetSessionVars().PlanColumnID.Add(1), + RetType: types.NewFieldType(tp), + }) + dual.SetStats(&property.StatsInfo{RowCount: count}) + return dual +} + +func TestGreedyConnectivityProbeKeepsProjectionLeafImmutable(t *testing.T) { + ctx := coretestsdk.MockContext() + defer domain.GetDomain(ctx).StatsHandle().Close() + + stringLeaf := newLegacyTypedLeaf(ctx, mysql.TypeVarchar, 1) + intLeaf := newLegacyTypedLeaf(ctx, mysql.TypeLonglong, 1) + + // Simulate a projected join key that originally participated in a col=col + // equality edge. When the legacy greedy connectivity probe flips the pair + // direction, the rebuilt equality injects helper expressions. The probe must + // not mutate the original projection leaf in place. + projectedKey := &expression.Column{ + UniqueID: ctx.GetSessionVars().PlanColumnID.Add(1), + RetType: types.NewFieldType(mysql.TypeLonglong), + } + projectionLeaf := logicalop.LogicalProjection{ + Exprs: []expression.Expression{stringLeaf.Schema().Columns[0]}, + }.Init(ctx, 0) + projectionLeaf.SetSchema(expression.NewSchema(projectedKey)) + projectionLeaf.SetChildren(stringLeaf) + projectionLeaf.SetStats(&property.StatsInfo{RowCount: stringLeaf.StatsInfo().RowCount}) + + eqCond, ok := expression.NewFunctionInternal(ctx.GetExprCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), + projectedKey, intLeaf.Schema().Columns[0]).(*expression.ScalarFunction) + require.True(t, ok) + + // Reversing the join side now requires casts, which exercises the helper + // projection path during speculative join construction. + projectedKey.RetType = types.NewFieldType(mysql.TypeVarchar) + + solver := &joinReorderGreedySolver{ + allInnerJoin: true, + baseSingleGroupJoinOrderSolver: &baseSingleGroupJoinOrderSolver{ + ctx: ctx, + basicJoinGroupInfo: &basicJoinGroupInfo{ + eqEdges: []*expression.ScalarFunction{eqCond}, + joinTypes: []*joinTypeWithExtMsg{{ + JoinType: base.InnerJoin, + }}, + }, + }, + } + + origExprCnt := len(projectionLeaf.Exprs) + origSchemaLen := projectionLeaf.Schema().Len() + probe := solver.probeConnection(intLeaf, projectionLeaf) + require.True(t, probe.HasEQEdge()) + require.True(t, probe.HasJoinCondition()) + require.False(t, probe.IsCartesian()) + require.Equal(t, base.InnerJoin, probe.joinType.JoinType) + require.Same(t, intLeaf, probe.leftPlan) + require.Same(t, projectionLeaf, probe.rightPlan) + require.Len(t, projectionLeaf.Exprs, origExprCnt) + require.Equal(t, origSchemaLen, projectionLeaf.Schema().Len()) + + join, remainOtherConds, err := solver.buildJoinFromProbe(probe) + require.NoError(t, err) + require.Empty(t, remainOtherConds) + _, _, err = join.RecursiveDeriveStats(nil) + require.NoError(t, err) + logicalJoin, ok := join.(*logicalop.LogicalJoin) + require.True(t, ok) + clonedRight, ok := logicalJoin.Children()[1].(*logicalop.LogicalProjection) + require.True(t, ok) + require.NotSame(t, projectionLeaf, clonedRight) + require.Len(t, clonedRight.Exprs, origExprCnt+1) + require.Equal(t, origSchemaLen+1, clonedRight.Schema().Len()) + require.NotNil(t, clonedRight.StatsInfo()) + _, ok = clonedRight.StatsInfo().ColNDVs[clonedRight.Schema().Columns[origSchemaLen].UniqueID] + require.True(t, ok) + require.Len(t, projectionLeaf.Exprs, origExprCnt) + require.Equal(t, origSchemaLen, projectionLeaf.Schema().Len()) +} diff --git a/pkg/planner/core/joinorder/BUILD.bazel b/pkg/planner/core/joinorder/BUILD.bazel index 696cf206dade3..f247d863c56d6 100644 --- a/pkg/planner/core/joinorder/BUILD.bazel +++ b/pkg/planner/core/joinorder/BUILD.bazel @@ -31,11 +31,25 @@ go_library( go_test( name = "joinorder_test", timeout = "short", - srcs = ["bitset_bench_test.go"], + srcs = [ + "bitset_bench_test.go", + "join_order_side_effect_test.go", + ], embed = [":joinorder"], flaky = True, + shard_count = 2, deps = [ + "//pkg/domain", + "//pkg/expression", + "//pkg/parser/ast", + "//pkg/parser/mysql", + "//pkg/planner/core/base", + "//pkg/planner/core/operator/logicalop", + "//pkg/planner/property", + "//pkg/planner/util/coretestsdk", + "//pkg/types", "//pkg/util/intset", "@com_github_bits_and_blooms_bitset//:bitset", + "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/planner/core/joinorder/conflict_detector.go b/pkg/planner/core/joinorder/conflict_detector.go index b53e0a51fb8b6..71fa364c63887 100644 --- a/pkg/planner/core/joinorder/conflict_detector.go +++ b/pkg/planner/core/joinorder/conflict_detector.go @@ -578,6 +578,55 @@ func (r *CheckConnectionResult) NoEQEdge() bool { return !r.hasEQCond } +// HasJoinCondition reports whether the connection is backed by at least one +// real join predicate instead of a pure cartesian edge. +func (r *CheckConnectionResult) HasJoinCondition() bool { + if r == nil || r.node1 == nil || r.node2 == nil { + return false + } + if r.edgeHasJoinCondition(r.appliedNonInnerEdge) { + return true + } + for _, e := range r.appliedInnerEdges { + if r.edgeHasJoinCondition(e) { + return true + } + } + return false +} + +func (r *CheckConnectionResult) edgeHasJoinCondition(e *edge) bool { + if e == nil { + return false + } + if len(e.eqConds) > 0 { + return true + } + // Degenerate one-sided predicates still form edges so CD-C can preserve + // correctness constraints, but seed-by-cost should only treat predicates + // that reference both candidate sides as real join conditions. + for _, cond := range e.nonEQConds { + if ExprConnectsBothSides(cond, r.node1.p.Schema(), r.node2.p.Schema()) { + return true + } + } + return false +} + +// ExprConnectsBothSides reports whether cond references both the left and right +// schemas instead of being a one-sided predicate. +func ExprConnectsBothSides(cond expression.Expression, leftSchema, rightSchema *expression.Schema) bool { + if cond == nil || leftSchema == nil || rightSchema == nil { + return false + } + mergedSchema := expression.MergeSchema(leftSchema, rightSchema) + if !expression.ExprFromSchema(cond, mergedSchema) { + return false + } + return !expression.ExprFromSchema(cond, leftSchema) && + !expression.ExprFromSchema(cond, rightSchema) +} + // CheckConnection tests whether any edge can validly connect node1 and node2. // It's corresponding to the pseudocode for APPLICABLE(b/c) in the paper(Figure-9). // The basic idea is: It collects all applicable inner edges (there can be many) and at most one @@ -724,7 +773,10 @@ func (d *ConflictDetector) MakeJoin(checkResult *CheckConnectionResult, vertexHi }, nil } -func alignEQConds(ctx base.PlanContext, left, right base.LogicalPlan, eqConds []*expression.ScalarFunction) (newLeft base.LogicalPlan, newRight base.LogicalPlan, alignedEQConds []*expression.ScalarFunction, err error) { +// AlignEQCondsWithoutMutation aligns eqConds to the provided left/right plans +// and injects helper projections on temporary plan clones when type coercion +// makes a swapped key cease to be a plain column reference. +func AlignEQCondsWithoutMutation(ctx base.PlanContext, left, right base.LogicalPlan, eqConds []*expression.ScalarFunction) (newLeft base.LogicalPlan, newRight base.LogicalPlan, alignedEQConds []*expression.ScalarFunction, err error) { if len(eqConds) == 0 { return left, right, nil, nil } @@ -749,10 +801,10 @@ func alignEQConds(ctx base.PlanContext, left, right base.LogicalPlan, eqConds [] lCol := swapped.GetArgs()[0] rCol := swapped.GetArgs()[1] if !isCol0 { - left, lCol = logicalop.InjectExpr(left, swapped.GetArgs()[0]) + left, lCol = logicalop.InjectExprAvoidingMutation(left, swapped.GetArgs()[0]) } if !isCol1 { - right, rCol = logicalop.InjectExpr(right, swapped.GetArgs()[1]) + right, rCol = logicalop.InjectExprAvoidingMutation(right, swapped.GetArgs()[1]) } swapped = expression.NewFunctionInternal(ctx.GetExprCtx(), cond.FuncName.L, cond.GetStaticType(), lCol, rCol).(*expression.ScalarFunction) @@ -770,14 +822,14 @@ func makeNonInnerJoin(ctx base.PlanContext, checkResult *CheckConnectionResult, var alignedEQConds []*expression.ScalarFunction var err error - checkResult.node1.p, checkResult.node2.p, alignedEQConds, err = alignEQConds(ctx, checkResult.node1.p, checkResult.node2.p, e.eqConds) + // Keep aligned helper projections local to this join construction. During + // seed scoring, the caller may discard this candidate and continue probing + // other pairs, so mutating checkResult.node{1,2}.p here would leak state. + left, right, alignedEQConds, err := AlignEQCondsWithoutMutation(ctx, checkResult.node1.p, checkResult.node2.p, e.eqConds) if err != nil { return nil, err } - left := checkResult.node1.p - right := checkResult.node2.p - join, err := newCartesianJoin(ctx, e.joinType, left, right, vertexHints) if err != nil { return nil, err @@ -899,15 +951,20 @@ func makeInnerJoin(ctx base.PlanContext, checkResult *CheckConnectionResult, exi var alignedEQConds []*expression.ScalarFunction newEqConds := make([]*expression.ScalarFunction, 0, 8) newOtherConds := make([]expression.Expression, 0, 8) + // Reuse the locally aligned plans across all edges of this candidate join, + // but do not write them back into checkResult.node{1,2}.p for the same + // reason as makeNonInnerJoin() above. + left := checkResult.node1.p + right := checkResult.node2.p for _, e := range checkResult.appliedInnerEdges { - checkResult.node1.p, checkResult.node2.p, alignedEQConds, err = alignEQConds(ctx, checkResult.node1.p, checkResult.node2.p, e.eqConds) + left, right, alignedEQConds, err = AlignEQCondsWithoutMutation(ctx, left, right, e.eqConds) if err != nil { return nil, err } newEqConds = append(newEqConds, alignedEQConds...) newOtherConds = append(newOtherConds, e.nonEQConds...) } - join, err := newCartesianJoin(ctx, checkResult.appliedInnerEdges[0].joinType, checkResult.node1.p, checkResult.node2.p, vertexHints) + join, err := newCartesianJoin(ctx, checkResult.appliedInnerEdges[0].joinType, left, right, vertexHints) if err != nil { return nil, err } diff --git a/pkg/planner/core/joinorder/join_order.go b/pkg/planner/core/joinorder/join_order.go index 8434a702df874..fdf9a6adeb001 100644 --- a/pkg/planner/core/joinorder/join_order.go +++ b/pkg/planner/core/joinorder/join_order.go @@ -586,7 +586,10 @@ func (j *joinOrderGreedy) optimize() (base.LogicalPlan, error) { var cartesianFactor float64 = j.ctx.GetSessionVars().CartesianJoinOrderThreshold var disableCartesian = cartesianFactor <= 0 allowNoEQ := !disableCartesian && j.group.allInnerJoin - if nodes, err = greedyConnectJoinNodes(detector, nodes, j.group.vertexHints, cartesianFactor, allowNoEQ); err != nil { + // LEADING should lock the hinted component's first seed choice, but it + // should not disable seed-by-cost for later disconnected components. + seedByCost := j.ctx.GetSessionVars().TiDBOptGreedyJoinSeedByCost + if nodes, err = greedyConnectJoinNodes(detector, nodes, j.group.vertexHints, cartesianFactor, allowNoEQ, seedByCost, nodeWithHint); err != nil { return nil, err } @@ -602,7 +605,7 @@ func (j *joinOrderGreedy) optimize() (base.LogicalPlan, error) { if cartesianFactor <= 0 { cartesianFactor = 1 } - if nodes, err = greedyConnectJoinNodes(detector, nodes, j.group.vertexHints, cartesianFactor, true); err != nil { + if nodes, err = greedyConnectJoinNodes(detector, nodes, j.group.vertexHints, cartesianFactor, true, seedByCost, nodeWithHint); err != nil { return nil, err } if len(nodes) != befLen { @@ -638,13 +641,26 @@ func (j *joinOrderGreedy) optimize() (base.LogicalPlan, error) { return root.p, nil } -func greedyConnectJoinNodes(detector *ConflictDetector, nodes []*Node, vertexHints map[int]*JoinMethodHint, cartesianFactor float64, allowNoEQ bool) ([]*Node, error) { +func greedyConnectJoinNodes(detector *ConflictDetector, nodes []*Node, vertexHints map[int]*JoinMethodHint, cartesianFactor float64, allowNoEQ bool, startByCost bool, lockedSeedNode *Node) ([]*Node, error) { // Outer loop: keep trying while we have multiple nodes and made progress in the last iteration. // This handles cases where conflict rules block some joins until other joins are completed. for len(nodes) > 1 { madeProgress := false var curJoinIdx int for curJoinIdx < len(nodes)-1 { + if startByCost && nodes[curJoinIdx].bitSet.Len() == 1 && nodes[curJoinIdx] != lockedSeedNode { + // Only reseed while this position still holds a single untouched leaf. + // Once it becomes a merged node, the first greedy choice is already fixed. + var seeded bool + var err error + nodes, seeded, err = seedGreedyJoinComponentByCost(detector, nodes, curJoinIdx, vertexHints, cartesianFactor, allowNoEQ) + if err != nil { + return nil, err + } + if seeded { + madeProgress = true + } + } var bestNode *Node var bestIdx int curJoinTree := nodes[curJoinIdx] @@ -694,6 +710,114 @@ func greedyConnectJoinNodes(detector *ConflictDetector, nodes []*Node, vertexHin return nodes, nil } +// seedGreedyJoinComponentByCost replaces the first untouched leaf at `start` +// with the cheapest valid leaf-leaf join in the current connected component. +// This changes only the initial seed; later expansion still follows the normal +// greedy "attach the best next node" logic. +func seedGreedyJoinComponentByCost( + detector *ConflictDetector, + nodes []*Node, + start int, + vertexHints map[int]*JoinMethodHint, + cartesianFactor float64, + allowNoEQ bool, +) ([]*Node, bool, error) { + if start >= len(nodes)-1 || nodes[start].bitSet.Len() != 1 { + return nodes, false, nil + } + + componentIdxs, err := collectGreedySeedComponentIndices(detector, nodes, start) + if err != nil { + return nil, false, err + } + if len(componentIdxs) < 2 { + return nodes, false, nil + } + + var ( + bestNode *Node + bestLeftIdx int + bestRightIdx int + ) + for leftPos := range len(componentIdxs) - 1 { + leftIdx := componentIdxs[leftPos] + for rightPos := leftPos + 1; rightPos < len(componentIdxs); rightPos++ { + rightIdx := componentIdxs[rightPos] + checkResult, newNode, err := checkConnectionAndMakeJoin(detector, nodes[leftIdx], nodes[rightIdx], vertexHints, allowNoEQ) + if err != nil { + return nil, false, err + } + if newNode == nil { + continue + } + if checkResult.NoEQEdge() { + if !allowNoEQ { + continue + } + // Reseeding should stay within the current connected component, but + // it should not start from a pure cartesian pair when a real join + // predicate is available elsewhere in that component. + if !checkResult.HasJoinCondition() { + continue + } + newNode.cumCost, err = applyCartesianFactor(newNode.cumCost, cartesianFactor) + if err != nil { + return nil, false, err + } + } + // The cartesian/no-EQ penalty has already been folded into cumCost above, + // so compare the penalized cost directly and avoid double counting it. + if bestNode == nil || newNode.cumCost < bestNode.cumCost { + bestNode = newNode + bestLeftIdx = leftIdx + bestRightIdx = rightIdx + } + } + } + if bestNode == nil { + return nodes, false, nil + } + + // Keep earlier finished components intact and rewrite only the current + // connected component by collapsing the chosen seed pair into one node. + newNodes := make([]*Node, 0, len(nodes)-1) + newNodes = append(newNodes, nodes[:start]...) + newNodes = append(newNodes, bestNode) + for idx := start; idx < len(nodes); idx++ { + if idx == bestLeftIdx || idx == bestRightIdx { + continue + } + newNodes = append(newNodes, nodes[idx]) + } + return newNodes, true, nil +} + +func collectGreedySeedComponentIndices(detector *ConflictDetector, nodes []*Node, start int) ([]int, error) { + // Expand only inside the connected component that starts at `start`. Pure + // cartesian edges are excluded here so seed-by-cost can only reseed within + // the current join group instead of pulling in a later disconnected group. + componentIdxs := []int{start} + seen := make([]bool, len(nodes)) + seen[start] = true + for head := 0; head < len(componentIdxs); head++ { + leftIdx := componentIdxs[head] + for rightIdx := start; rightIdx < len(nodes); rightIdx++ { + if seen[rightIdx] || nodes[rightIdx].bitSet.Len() != 1 { + continue + } + checkResult, err := detector.CheckConnection(nodes[leftIdx], nodes[rightIdx]) + if err != nil { + return nil, err + } + if checkResult.Connected() && checkResult.HasJoinCondition() { + seen[rightIdx] = true + componentIdxs = append(componentIdxs, rightIdx) + } + } + } + return componentIdxs, nil +} + func collectUsedEdges(nodes []*Node) map[uint64]struct{} { usedEdges := make(map[uint64]struct{}) for _, node := range nodes { diff --git a/pkg/planner/core/joinorder/join_order_side_effect_test.go b/pkg/planner/core/joinorder/join_order_side_effect_test.go new file mode 100644 index 0000000000000..1ea7092070b41 --- /dev/null +++ b/pkg/planner/core/joinorder/join_order_side_effect_test.go @@ -0,0 +1,179 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package joinorder + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/domain" + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" + "github.com/pingcap/tidb/pkg/planner/property" + "github.com/pingcap/tidb/pkg/planner/util/coretestsdk" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/intset" + "github.com/stretchr/testify/require" +) + +func newTypedLeaf(ctx base.PlanContext, name string, tp byte, count float64) *logicalop.LogicalTableDual { + dual := logicalop.LogicalTableDual{RowCount: 1}.Init(ctx, 0) + dual.SetSchema(expression.NewSchema()) + dual.Schema().Append(&expression.Column{ + UniqueID: ctx.GetSessionVars().PlanColumnID.Add(1), + RetType: types.NewFieldType(tp), + }) + dual.SetStats(&property.StatsInfo{RowCount: count}) + return dual +} + +func newLeafProjection(ctx base.PlanContext, child base.LogicalPlan) *logicalop.LogicalProjection { + proj := logicalop.LogicalProjection{ + Exprs: expression.Column2Exprs(child.Schema().Columns), + }.Init(ctx, 0) + proj.SetSchema(child.Schema().Clone()) + proj.SetChildren(child) + proj.SetStats(&property.StatsInfo{RowCount: child.StatsInfo().RowCount}) + return proj +} + +func singletonBitSet(v int) intset.FastIntSet { + var s intset.FastIntSet + s.Insert(v) + return s +} + +func TestSpeculativeJoinBuildDoesNotMutateLeafPlans(t *testing.T) { + ctx := coretestsdk.MockContext() + defer domain.GetDomain(ctx).StatsHandle().Close() + + stringLeaf := newLeafProjection(ctx, newTypedLeaf(ctx, "s", mysql.TypeVarchar, 1)) + intLeaf := newTypedLeaf(ctx, "i", mysql.TypeLonglong, 1) + + castStringToInt := expression.BuildCastFunction(ctx.GetExprCtx(), stringLeaf.Schema().Columns[0], types.NewFieldType(mysql.TypeLonglong)) + eqCond, ok := expression.NewFunctionInternal(ctx.GetExprCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), + castStringToInt, intLeaf.Schema().Columns[0]).(*expression.ScalarFunction) + require.True(t, ok) + + leftNode := &Node{ + bitSet: singletonBitSet(0), + p: intLeaf, + cumCost: 1, + usedEdges: make(map[uint64]struct{}), + } + rightNode := &Node{ + bitSet: singletonBitSet(1), + p: stringLeaf, + cumCost: 1, + usedEdges: make(map[uint64]struct{}), + } + all := leftNode.bitSet.Union(rightNode.bitSet) + detector := &ConflictDetector{ + ctx: ctx, + innerEdges: []*edge{{ + idx: 1, + joinType: base.InnerJoin, + eqConds: []*expression.ScalarFunction{eqCond}, + tes: all, + skipRules: true, + }}, + } + + origExprCnt := len(stringLeaf.Exprs) + origSchemaLen := stringLeaf.Schema().Len() + _, newNode, err := checkConnectionAndMakeJoin(detector, leftNode, rightNode, nil, false) + require.NoError(t, err) + require.NotNil(t, newNode) + join, ok := newNode.p.(*logicalop.LogicalJoin) + require.True(t, ok) + clonedRight, ok := join.Children()[1].(*logicalop.LogicalProjection) + require.True(t, ok) + require.NotSame(t, stringLeaf, clonedRight) + require.Same(t, clonedRight, clonedRight.Self()) + require.Len(t, clonedRight.Exprs, origExprCnt+1) + require.Equal(t, origSchemaLen+1, clonedRight.Schema().Len()) + require.NotNil(t, clonedRight.StatsInfo()) + _, ok = clonedRight.StatsInfo().ColNDVs[clonedRight.Schema().Columns[origSchemaLen].UniqueID] + require.True(t, ok) + require.Same(t, stringLeaf, rightNode.p) + require.Len(t, stringLeaf.Exprs, origExprCnt) + require.Equal(t, origSchemaLen, stringLeaf.Schema().Len()) +} + +func TestDegeneratePredicateDoesNotCountAsGreedySeedJoinCondition(t *testing.T) { + ctx := coretestsdk.MockContext() + defer domain.GetDomain(ctx).StatsHandle().Close() + + leftLeaf := newTypedLeaf(ctx, "a", mysql.TypeLonglong, 1) + midLeaf := newTypedLeaf(ctx, "b", mysql.TypeLonglong, 1) + rightLeaf := newTypedLeaf(ctx, "c", mysql.TypeLonglong, 1) + + leftNode := &Node{ + bitSet: singletonBitSet(0), + p: leftLeaf, + cumCost: 1, + usedEdges: make(map[uint64]struct{}), + } + midNode := &Node{ + bitSet: singletonBitSet(1), + p: midLeaf, + cumCost: 1, + usedEdges: make(map[uint64]struct{}), + } + rightNode := &Node{ + bitSet: singletonBitSet(2), + p: rightLeaf, + cumCost: 1, + usedEdges: make(map[uint64]struct{}), + } + + degenerateCond, ok := expression.NewFunctionInternal(ctx.GetExprCtx(), ast.GT, types.NewFieldType(mysql.TypeTiny), + leftLeaf.Schema().Columns[0], expression.NewZero()).(*expression.ScalarFunction) + require.True(t, ok) + crossCond, ok := expression.NewFunctionInternal(ctx.GetExprCtx(), ast.GT, types.NewFieldType(mysql.TypeTiny), + midLeaf.Schema().Columns[0], rightLeaf.Schema().Columns[0]).(*expression.ScalarFunction) + require.True(t, ok) + + detector := &ConflictDetector{ + ctx: ctx, + allInnerJoin: true, + } + degenerateEdge := detector.makeEdge(base.InnerJoin, []expression.Expression{degenerateCond}, leftNode.bitSet, midNode.bitSet, nil, nil) + degenerateEdge.nonEQConds = append(degenerateEdge.nonEQConds, degenerateCond) + crossEdge := detector.makeEdge(base.InnerJoin, []expression.Expression{crossCond}, midNode.bitSet, rightNode.bitSet, nil, nil) + crossEdge.nonEQConds = append(crossEdge.nonEQConds, crossCond) + + checkResult, err := detector.CheckConnection(leftNode, midNode) + require.NoError(t, err) + require.True(t, checkResult.Connected()) + require.True(t, checkResult.NoEQEdge()) + require.False(t, checkResult.HasJoinCondition()) + + checkResult, err = detector.CheckConnection(midNode, rightNode) + require.NoError(t, err) + require.True(t, checkResult.Connected()) + require.True(t, checkResult.NoEQEdge()) + require.True(t, checkResult.HasJoinCondition()) + + componentIdxs, err := collectGreedySeedComponentIndices(detector, []*Node{leftNode, midNode, rightNode}, 0) + require.NoError(t, err) + require.Equal(t, []int{0}, componentIdxs) + + componentIdxs, err = collectGreedySeedComponentIndices(detector, []*Node{leftNode, midNode, rightNode}, 1) + require.NoError(t, err) + require.Equal(t, []int{1, 2}, componentIdxs) +} diff --git a/pkg/planner/core/operator/logicalop/logical_projection.go b/pkg/planner/core/operator/logicalop/logical_projection.go index 8ca9a7ced1e82..d8213fd04c5b1 100644 --- a/pkg/planner/core/operator/logicalop/logical_projection.go +++ b/pkg/planner/core/operator/logicalop/logical_projection.go @@ -688,3 +688,22 @@ func InjectExpr(p base.LogicalPlan, expr expression.Expression) (base.LogicalPla } return proj, proj.AppendExpr(expr) } + +// InjectExprAvoidingMutation injects expr without mutating an existing projection in place. +// This is used by speculative join-order probing, where helper projections may be +// needed temporarily for key alignment but must not leak back into the original leaf. +func InjectExprAvoidingMutation(p base.LogicalPlan, expr expression.Expression) (base.LogicalPlan, *expression.Column) { + proj, ok := p.(*LogicalProjection) + if !ok { + return InjectExpr(p, expr) + } + proj = proj.LogicalProjectionShallowRef() + // ShallowRef keeps BaseLogicalPlan.self and the stats cache from the original + // projection. Fix both so RecursiveDeriveStats() and later costing operate on + // the speculative clone with its appended helper column. + proj.SetSelf(proj) + proj.ExprsShallowRef() + proj.SetSchema(p.Schema().Clone()) + proj.SetStats(nil) + return proj, proj.AppendExpr(expr) +} diff --git a/pkg/planner/core/rule_join_reorder.go b/pkg/planner/core/rule_join_reorder.go index 561da8a0aad16..48c3e3158f650 100644 --- a/pkg/planner/core/rule_join_reorder.go +++ b/pkg/planner/core/rule_join_reorder.go @@ -283,6 +283,29 @@ type JoinReOrderSolver struct { type jrNode struct { p base.LogicalPlan cumCost float64 + // leafCnt tracks how many original join-group leaf nodes are represented by + // this subtree. The cost-based seed heuristic only compares untouched leaves. + leafCnt int +} + +type legacyJoinProbeResult struct { + leftPlan base.LogicalPlan + rightPlan base.LogicalPlan + joinType *joinTypeWithExtMsg + eqEdges []*expression.ScalarFunction + hasOtherJoinCondition bool +} + +func (r *legacyJoinProbeResult) HasEQEdge() bool { + return r != nil && len(r.eqEdges) > 0 +} + +func (r *legacyJoinProbeResult) HasJoinCondition() bool { + return r != nil && (r.HasEQEdge() || r.hasOtherJoinCondition) +} + +func (r *legacyJoinProbeResult) IsCartesian() bool { + return r != nil && !r.HasJoinCondition() } type joinTypeWithExtMsg struct { @@ -467,11 +490,7 @@ func (s *baseSingleGroupJoinOrderSolver) generateNestedLeadingJoinGroup( }) } joiner := func(left, right base.LogicalPlan) (base.LogicalPlan, bool, error) { - currentJoin, ok := s.connectJoinNodes(left, right, hasOuterJoin) - if !ok { - return nil, false, nil - } - return currentJoin, true, nil + return s.connectJoinNodes(left, right, hasOuterJoin) } warn := func() { s.ctx.GetSessionVars().StmtCtx.SetHintWarning("leading hint contains unexpected element type") @@ -494,20 +513,20 @@ func (s *baseSingleGroupJoinOrderSolver) generateNestedLeadingJoinGroup( func (s *baseSingleGroupJoinOrderSolver) connectJoinNodes( currentJoin, nextNode base.LogicalPlan, hasOuterJoin bool, -) (base.LogicalPlan, bool) { - lNode, rNode, usedEdges, joinType := s.checkConnection(currentJoin, nextNode) - if hasOuterJoin && len(usedEdges) == 0 { +) (base.LogicalPlan, bool, error) { + probe := s.probeConnection(currentJoin, nextNode) + if hasOuterJoin && !probe.HasJoinCondition() { // If the joinGroups contain an outer join, we disable cartesian product. // For non-equality conditions, only allow them when they do not reference // null-extended columns from any outer join in the current group. - if !s.hasOtherJoinCondition(lNode, rNode) { - return nil, false - } + return nil, false, nil + } + currentJoin, rem, err := s.buildJoinFromProbe(probe) + if err != nil { + return nil, false, err } - var rem []expression.Expression - currentJoin, rem = s.makeJoin(lNode, rNode, usedEdges, joinType) s.otherConds = rem - return currentJoin, true + return currentJoin, true, nil } // generateJoinOrderNode used to derive the stats for the joinNodePlans and generate the jrNode groups based on the cost. @@ -522,6 +541,7 @@ func (s *baseSingleGroupJoinOrderSolver) generateJoinOrderNode(joinNodePlans []b joinGroup = append(joinGroup, &jrNode{ p: node, cumCost: cost, + leafCnt: 1, }) } return joinGroup, nil @@ -536,44 +556,31 @@ func (s *baseSingleGroupJoinOrderSolver) baseNodeCumCost(groupNode base.LogicalP return cost } -// checkConnection used to check whether two nodes have equal conditions or not. -func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan base.LogicalPlan) (leftNode, rightNode base.LogicalPlan, usedEdges []*expression.ScalarFunction, joinType *joinTypeWithExtMsg) { - joinType = &joinTypeWithExtMsg{JoinType: base.InnerJoin} - leftNode, rightNode = leftPlan, rightPlan +// probeConnection inspects whether two subplans are connected by eq edges or +// real cross-side non-equality predicates. It does not build joins or inject +// helper expressions into plan trees. +func (s *baseSingleGroupJoinOrderSolver) probeConnection(leftPlan, rightPlan base.LogicalPlan) *legacyJoinProbeResult { + result := &legacyJoinProbeResult{ + leftPlan: leftPlan, + rightPlan: rightPlan, + joinType: &joinTypeWithExtMsg{JoinType: base.InnerJoin}, + } for idx, edge := range s.eqEdges { lCol, rCol := expression.ExtractColumnsFromColOpCol(edge) if leftPlan.Schema().Contains(lCol) && rightPlan.Schema().Contains(rCol) { - joinType = s.joinTypes[idx] - usedEdges = append(usedEdges, edge) + result.joinType = s.joinTypes[idx] + result.eqEdges = append(result.eqEdges, edge) } else if rightPlan.Schema().Contains(lCol) && leftPlan.Schema().Contains(rCol) { - joinType = s.joinTypes[idx] - if joinType.JoinType != base.InnerJoin { - rightNode, leftNode = leftPlan, rightPlan - usedEdges = append(usedEdges, edge) - } else { - funcName := edge.FuncName.L - newSf := expression.NewFunctionInternal(s.ctx.GetExprCtx(), funcName, edge.GetStaticType(), rCol, lCol).(*expression.ScalarFunction) - - // after creating the new EQCondition function, the 2 args might not be column anymore, for example `sf=sf(cast(col))`, - // which breaks the assumption that join eq keys must be `col=col`, to handle this, inject 2 projections. - _, isCol0 := newSf.GetArgs()[0].(*expression.Column) - _, isCol1 := newSf.GetArgs()[1].(*expression.Column) - if !isCol0 || !isCol1 { - if !isCol0 { - leftPlan, rCol = logicalop.InjectExpr(leftPlan, newSf.GetArgs()[0]) - } - if !isCol1 { - rightPlan, lCol = logicalop.InjectExpr(rightPlan, newSf.GetArgs()[1]) - } - leftNode, rightNode = leftPlan, rightPlan - newSf = expression.NewFunctionInternal(s.ctx.GetExprCtx(), funcName, edge.GetStaticType(), - rCol, lCol).(*expression.ScalarFunction) - } - usedEdges = append(usedEdges, newSf) + result.joinType = s.joinTypes[idx] + if result.joinType.JoinType != base.InnerJoin { + result.leftPlan = rightPlan + result.rightPlan = leftPlan } + result.eqEdges = append(result.eqEdges, edge) } } - return + result.hasOtherJoinCondition = s.hasOtherJoinCondition(result.leftPlan, result.rightPlan) + return result } // hasOtherJoinCondition checks whether there are non-equality join conditions @@ -584,12 +591,8 @@ func (s *baseSingleGroupJoinOrderSolver) hasOtherJoinCondition(leftPlan, rightPl if len(s.otherConds) == 0 { return false } - mergedSchema := expression.MergeSchema(leftPlan.Schema(), rightPlan.Schema()) for _, cond := range s.otherConds { - if !expression.ExprFromSchema(cond, mergedSchema) { - continue - } - if expression.ExprFromSchema(cond, leftPlan.Schema()) || expression.ExprFromSchema(cond, rightPlan.Schema()) { + if !joinorder.ExprConnectsBothSides(cond, leftPlan.Schema(), rightPlan.Schema()) { continue } if s.nullExtendedCols != nil && expression.ExprReferenceSchema(cond, s.nullExtendedCols) { @@ -600,6 +603,19 @@ func (s *baseSingleGroupJoinOrderSolver) hasOtherJoinCondition(leftPlan, rightPl return false } +func (s *baseSingleGroupJoinOrderSolver) buildJoinFromProbe(probe *legacyJoinProbeResult) (base.LogicalPlan, []expression.Expression, error) { + leftPlan := probe.leftPlan + rightPlan := probe.rightPlan + eqEdges := probe.eqEdges + var err error + leftPlan, rightPlan, eqEdges, err = joinorder.AlignEQCondsWithoutMutation(s.ctx, leftPlan, rightPlan, eqEdges) + if err != nil { + return nil, nil, err + } + join, remainOtherConds := s.makeJoin(leftPlan, rightPlan, eqEdges, probe.joinType) + return join, remainOtherConds, nil +} + // makeJoin build join tree for the nodes which have equal conditions to connect them. func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan base.LogicalPlan, eqEdges []*expression.ScalarFunction, joinType *joinTypeWithExtMsg) (base.LogicalPlan, []expression.Expression) { remainOtherConds := make([]expression.Expression, len(s.otherConds)) diff --git a/pkg/planner/core/rule_join_reorder_greedy.go b/pkg/planner/core/rule_join_reorder_greedy.go index b9f6b0366451c..758565fe18a65 100644 --- a/pkg/planner/core/rule_join_reorder_greedy.go +++ b/pkg/planner/core/rule_join_reorder_greedy.go @@ -96,16 +96,30 @@ func (s *joinReorderGreedySolver) solve(joinNodePlans []base.LogicalPlan) (base. } func (s *joinReorderGreedySolver) constructConnectedJoinTree() (*jrNode, error) { - curJoinTree := s.curJoinGroup[0] - s.curJoinGroup = s.curJoinGroup[1:] cartesianThreshold := s.ctx.GetSessionVars().CartesianJoinOrderThreshold + // Mirror the advanced joinorder path: if enabled, pick the initial greedy + // seed by cost before falling back to the historical smallest-node start. + curJoinTree, seeded, err := s.seedConnectedJoinTreeByCost(cartesianThreshold) + if err != nil { + return nil, err + } + if !seeded { + curJoinTree = s.curJoinGroup[0] + s.curJoinGroup = s.curJoinGroup[1:] + } for { bestCost := math.MaxFloat64 bestIdx, whateverValidOneIdx, bestIsCartesian := -1, -1, false var finalRemainOthers, remainOthersOfWhateverValidOne []expression.Expression var bestJoin, whateverValidOne base.LogicalPlan + var newJoin base.LogicalPlan + var remainOthers []expression.Expression + var isCartesian bool for i, node := range s.curJoinGroup { - newJoin, remainOthers, isCartesian := s.checkConnectionAndMakeJoin(curJoinTree.p, node.p) + newJoin, remainOthers, isCartesian, err = s.checkConnectionAndMakeJoin(curJoinTree.p, node.p) + if err != nil { + return nil, err + } if isCartesian { s.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptCartesianJoinOrderThreshold) } @@ -125,16 +139,7 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree() (*jrNode, error) // Cartesian join is risky but skipping it brutally may lead to bad join orders, see #63290. // To trade-off, we use a ratio as penalty to control the preference. // Only select a cartesian join when cost(cartesian)*ratio < cost(non-cartesian). - curIsBetter := false - if !bestIsCartesian && isCartesian { - curIsBetter = curCost*cartesianThreshold < bestCost - } else if bestIsCartesian && !isCartesian { - curIsBetter = curCost < bestCost*cartesianThreshold - } else { - curIsBetter = curCost < bestCost - } - - if curIsBetter { + if betterLegacyGreedyCandidate(curCost, isCartesian, bestCost, bestIsCartesian, cartesianThreshold) { bestCost = curCost bestJoin = newJoin bestIdx = i @@ -158,6 +163,7 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree() (*jrNode, error) curJoinTree = &jrNode{ p: bestJoin, cumCost: bestCost, + leafCnt: curJoinTree.leafCnt + s.curJoinGroup[bestIdx].leafCnt, } s.curJoinGroup = slices.Delete(s.curJoinGroup, bestIdx, bestIdx+1) s.otherConds = finalRemainOthers @@ -165,9 +171,130 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree() (*jrNode, error) return curJoinTree, nil } -func (s *joinReorderGreedySolver) checkConnectionAndMakeJoin(leftPlan, rightPlan base.LogicalPlan) (base.LogicalPlan, []expression.Expression, bool) { - leftPlan, rightPlan, usedEdges, joinType := s.checkConnection(leftPlan, rightPlan) - isCartesian := len(usedEdges) == 0 && !s.hasOtherJoinCondition(leftPlan, rightPlan) +// seedConnectedJoinTreeByCost is the legacy-path counterpart of +// seedGreedyJoinComponentByCost in pkg/planner/core/joinorder. It only changes +// the first pair selection and keeps the rest of the legacy greedy expansion. +func (s *joinReorderGreedySolver) seedConnectedJoinTreeByCost(cartesianThreshold float64) (*jrNode, bool, error) { + if !s.ctx.GetSessionVars().TiDBOptGreedyJoinSeedByCost || len(s.curJoinGroup) < 2 { + return nil, false, nil + } + // LEADING only fixes the component that currently starts with the hinted + // subtree. Later disconnected components should still be allowed to reseed. + if s.leadingJoinGroup != nil && s.curJoinGroup[0].p == s.leadingJoinGroup { + return nil, false, nil + } + + componentIdxs := s.collectConnectedSeedComponentIndices() + if len(componentIdxs) < 2 { + return nil, false, nil + } + + bestCost := math.MaxFloat64 + bestLeftIdx, bestRightIdx, bestLeafCnt := -1, -1, 0 + bestIsCartesian := false + var bestJoin base.LogicalPlan + var bestRemainOthers []expression.Expression + var newJoin base.LogicalPlan + var remainOthers []expression.Expression + var isCartesian bool + var err error + + for leftPos := range len(componentIdxs) - 1 { + leftIdx := componentIdxs[leftPos] + for rightPos := leftPos + 1; rightPos < len(componentIdxs); rightPos++ { + rightIdx := componentIdxs[rightPos] + newJoin, remainOthers, isCartesian, err = s.checkConnectionAndMakeJoin(s.curJoinGroup[leftIdx].p, s.curJoinGroup[rightIdx].p) + if err != nil { + return nil, false, err + } + if isCartesian { + s.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptCartesianJoinOrderThreshold) + } + // Keep the historical greedy attach loop's cartesian behavior, but do + // not let the reseed heuristic start a connected component from a + // pure cartesian pair. + if newJoin == nil || isCartesian { + continue + } + _, _, err := newJoin.RecursiveDeriveStats(nil) + if err != nil { + return nil, false, err + } + curCost := s.calcJoinCumCost(newJoin, s.curJoinGroup[leftIdx], s.curJoinGroup[rightIdx]) + if betterLegacyGreedyCandidate(curCost, isCartesian, bestCost, bestIsCartesian, cartesianThreshold) { + bestCost = curCost + bestJoin = newJoin + bestLeftIdx = leftIdx + bestRightIdx = rightIdx + bestLeafCnt = s.curJoinGroup[leftIdx].leafCnt + s.curJoinGroup[rightIdx].leafCnt + bestRemainOthers = remainOthers + bestIsCartesian = isCartesian + } + } + } + if bestJoin == nil { + return nil, false, nil + } + + // Collapse the chosen leaf-leaf seed pair into one subtree and leave the + // remaining nodes untouched for the normal greedy attach loop. + remaining := make([]*jrNode, 0, len(s.curJoinGroup)-2) + for idx, node := range s.curJoinGroup { + if idx == bestLeftIdx || idx == bestRightIdx { + continue + } + remaining = append(remaining, node) + } + s.curJoinGroup = remaining + s.otherConds = bestRemainOthers + return &jrNode{ + p: bestJoin, + cumCost: bestCost, + leafCnt: bestLeafCnt, + }, true, nil +} + +func (s *joinReorderGreedySolver) collectConnectedSeedComponentIndices() []int { + // The legacy path also reseeds only inside the component currently being + // expanded. Treating a pure cartesian edge as connectivity here would let a + // cheaper pair from a later disconnected component jump ahead. + componentIdxs := []int{0} + seen := make([]bool, len(s.curJoinGroup)) + seen[0] = true + for head := 0; head < len(componentIdxs); head++ { + leftIdx := componentIdxs[head] + for rightIdx := range len(s.curJoinGroup) { + if seen[rightIdx] || s.curJoinGroup[rightIdx].leafCnt != 1 { + continue + } + if s.hasNonCartesianSeedConnection(s.curJoinGroup[leftIdx].p, s.curJoinGroup[rightIdx].p) { + seen[rightIdx] = true + componentIdxs = append(componentIdxs, rightIdx) + } + } + } + return componentIdxs +} + +func (s *joinReorderGreedySolver) hasNonCartesianSeedConnection(leftPlan, rightPlan base.LogicalPlan) bool { + return s.probeConnection(leftPlan, rightPlan).HasJoinCondition() +} + +// betterLegacyGreedyCandidate preserves the existing cartesian penalty policy +// so the new seed heuristic compares candidates the same way as the old loop. +func betterLegacyGreedyCandidate(curCost float64, curIsCartesian bool, bestCost float64, bestIsCartesian bool, cartesianThreshold float64) bool { + if !bestIsCartesian && curIsCartesian { + return curCost*cartesianThreshold < bestCost + } + if bestIsCartesian && !curIsCartesian { + return curCost < bestCost*cartesianThreshold + } + return curCost < bestCost +} + +func (s *joinReorderGreedySolver) checkConnectionAndMakeJoin(leftPlan, rightPlan base.LogicalPlan) (base.LogicalPlan, []expression.Expression, bool, error) { + probe := s.probeConnection(leftPlan, rightPlan) + isCartesian := probe.IsCartesian() if isCartesian && // cartesian join (!s.allInnerJoin || // not all joins are inner joins s.ctx.GetSessionVars().CartesianJoinOrderThreshold <= 0) { // cartesian join is disabled @@ -175,8 +302,11 @@ func (s *joinReorderGreedySolver) checkConnectionAndMakeJoin(leftPlan, rightPlan // t2 and t3, and cartesian join between t2 and t3 might lead to incorrect results. // For safety we don't allow cartesian outer join here. // For inner joins like `t1 join t2 join t3`, we can reorder them freely, so we allow cartesian join here. - return nil, nil, false + return nil, nil, false, nil + } + join, otherConds, err := s.buildJoinFromProbe(probe) + if err != nil { + return nil, nil, false, err } - join, otherConds := s.makeJoin(leftPlan, rightPlan, usedEdges, joinType) - return join, otherConds, isCartesian + return join, otherConds, isCartesian, nil } diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index 09fd875389e73..87f04ee9fbd6a 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -668,6 +668,12 @@ const ( // TiDBOptEnableAdvancedJoinReorder controls whether to use the advanced join reorder framework. TiDBOptEnableAdvancedJoinReorder = "tidb_opt_enable_advanced_join_reorder" + // TiDBOptGreedyJoinSeedByCost controls whether greedy join reorder + // picks the first join in a connected component by the cheapest join cost + // instead of the smallest base-node cumulative cost. It is kept opt-in so + // existing workloads preserve their historical plan shapes by default. + TiDBOptGreedyJoinSeedByCost = "tidb_opt_greedy_join_seed_by_cost" + // TiDBOptJoinReorderThroughSel enables pushing selection conditions down to // reordered join trees when applicable. TiDBOptJoinReorderThroughSel = "tidb_opt_join_reorder_through_sel" @@ -1545,6 +1551,7 @@ const ( DefEnableVectorizedExpression = true DefTiDBOptJoinReorderThreshold = 0 DefTiDBOptEnableAdvancedJoinReorder = true + DefTiDBOptGreedyJoinSeedByCost = false DefTiDBOptJoinReorderThroughSel = false DefTiDBDDLSlowOprThreshold = 300 DefTiDBUseFastAnalyze = false diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go index 7311b16d722de..7838468af6b36 100644 --- a/pkg/sessionctx/variable/session.go +++ b/pkg/sessionctx/variable/session.go @@ -1264,6 +1264,10 @@ type SessionVars struct { // TiDBOptEnableAdvancedJoinReorder controls whether to use the advanced join reorder framework. TiDBOptEnableAdvancedJoinReorder bool + // TiDBOptGreedyJoinSeedByCost controls whether greedy join reorder + // picks the first join by join cost instead of the smallest base node. + TiDBOptGreedyJoinSeedByCost bool + // TiDBOptJoinReorderThroughSel enables pushing selection conditions down to // reordered join trees when applicable. TiDBOptJoinReorderThroughSel bool diff --git a/pkg/sessionctx/variable/setvar_affect.go b/pkg/sessionctx/variable/setvar_affect.go index dbf9e58ce2e4f..7a63e013d06b2 100644 --- a/pkg/sessionctx/variable/setvar_affect.go +++ b/pkg/sessionctx/variable/setvar_affect.go @@ -99,6 +99,7 @@ var isHintUpdatableVerified = map[string]struct{}{ "tidb_enable_vectorized_expression": {}, "tidb_opt_join_reorder_threshold": {}, "tidb_opt_enable_advanced_join_reorder": {}, + "tidb_opt_greedy_join_seed_by_cost": {}, "tidb_enable_index_merge": {}, "tidb_enable_no_backslash_escapes_in_like": {}, "tidb_enable_extended_stats": {}, diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index 24180942a00c7..76aedec241e8f 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -2612,6 +2612,10 @@ var defaultSysVars = []*SysVar{ s.TiDBOptEnableAdvancedJoinReorder = TiDBOptOn(val) return nil }}, + {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptGreedyJoinSeedByCost, Value: BoolToOnOff(vardef.DefTiDBOptGreedyJoinSeedByCost), Type: vardef.TypeBool, SetSession: func(s *SessionVars, val string) error { + s.TiDBOptGreedyJoinSeedByCost = TiDBOptOn(val) + return nil + }}, {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptJoinReorderThroughSel, Value: BoolToOnOff(vardef.DefTiDBOptJoinReorderThroughSel), Type: vardef.TypeBool, SetSession: func(s *SessionVars, val string) error { s.TiDBOptJoinReorderThroughSel = TiDBOptOn(val) return nil diff --git a/pkg/sessionctx/variable/varsutil_test.go b/pkg/sessionctx/variable/varsutil_test.go index 16616e04174b1..397d68ba0824b 100644 --- a/pkg/sessionctx/variable/varsutil_test.go +++ b/pkg/sessionctx/variable/varsutil_test.go @@ -278,6 +278,14 @@ func TestVarsutil(t *testing.T) { require.NoError(t, err) require.Equal(t, true, v.TiDBOptEnableAdvancedJoinReorder) + require.Equal(t, vardef.DefTiDBOptGreedyJoinSeedByCost, v.TiDBOptGreedyJoinSeedByCost) + err = v.SetSystemVar(vardef.TiDBOptGreedyJoinSeedByCost, "ON") + require.NoError(t, err) + require.Equal(t, true, v.TiDBOptGreedyJoinSeedByCost) + err = v.SetSystemVar(vardef.TiDBOptGreedyJoinSeedByCost, "OFF") + require.NoError(t, err) + require.Equal(t, false, v.TiDBOptGreedyJoinSeedByCost) + err = v.SetSystemVar(vardef.TiDBLowResolutionTSO, "1") require.NoError(t, err) val, err = v.GetSessionOrGlobalSystemVar(context.Background(), vardef.TiDBLowResolutionTSO) diff --git a/tests/integrationtest/r/planner/core/join_reorder2.result b/tests/integrationtest/r/planner/core/join_reorder2.result index fb94aca83af7d..83e9d5b3bd7f7 100644 --- a/tests/integrationtest/r/planner/core/join_reorder2.result +++ b/tests/integrationtest/r/planner/core/join_reorder2.result @@ -265,3 +265,331 @@ Projection root planner__core__join_reorder2.t1.id, planner__core__join_reorder │ └─TableFullScan cop[tikv] table:t5 keep order:true, stats:pseudo └─TableReader(Probe) root data:TableFullScan └─TableFullScan cop[tikv] table:t1 keep order:true, stats:pseudo +drop table if exists t_seed1, t_seed2, t_seed3; +create table t_seed1(id int not null primary key, col1 int not null, key idx_col1(col1)); +create table t_seed2(id int not null primary key, col2 int not null); +create table t_seed3(id int not null primary key, col1 int not null, key idx_col1(col1)); +insert into t_seed1 values +(1, 1), (2, 2), (3, 3), (4, 4), +(5, 5), (6, 6), (7, 7), (8, 8), +(9, 9), (10, 10), (11, 11), (12, 12), +(13, 13), (14, 14), (15, 15), (16, 16); +insert into t_seed2 values (1, 1); +insert into t_seed3 values (1, 1), (2, 2); +analyze table t_seed1, t_seed2, t_seed3; +set @@tidb_enable_outer_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select * from +(select t_seed1.id, t_seed1.col1 from t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2) tt +join t_seed3 on t_seed3.col1 = tt.col1; +id task access object operator info +HashJoin root inner join, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed3.col1)] +├─TableReader(Build) root data:TableFullScan +│ └─TableFullScan cop[tikv] table:t_seed3 keep order:false +└─HashJoin(Probe) root left outer join, left side:TableReader, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed2 keep order:false + └─TableReader(Probe) root data:TableFullScan + └─TableFullScan cop[tikv] table:t_seed1 keep order:false +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select * from +(select t_seed1.id, t_seed1.col1 from t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2) tt +join t_seed3 on t_seed3.col1 = tt.col1; +id task access object operator info +HashJoin root left outer join, left side:MergeJoin, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] +├─TableReader(Build) root data:TableFullScan +│ └─TableFullScan cop[tikv] table:t_seed2 keep order:false +└─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_seed1.col1, right key:planner__core__join_reorder2.t_seed3.col1 + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_seed3, index:idx_col1(col1) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_seed1, index:idx_col1(col1) keep order:true +set @@tidb_opt_enable_advanced_join_reorder = 0; +explain format = 'plan_tree' select * from +(select t_seed1.id, t_seed1.col1 from t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2) tt +join t_seed3 on t_seed3.col1 = tt.col1; +id task access object operator info +HashJoin root left outer join, left side:MergeJoin, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] +├─TableReader(Build) root data:TableFullScan +│ └─TableFullScan cop[tikv] table:t_seed2 keep order:false +└─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_seed1.col1, right key:planner__core__join_reorder2.t_seed3.col1 + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_seed3, index:idx_col1(col1) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_seed1, index:idx_col1(col1) keep order:true +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +drop table if exists t_lead1, t_lead2; +create table t_lead1(id int not null primary key, a int not null, key idx_a(a)); +create table t_lead2(id int not null primary key, a int not null, key idx_a(a)); +insert into t_lead1 values (1, 1), (2, 2); +insert into t_lead2 values (1, 1), (2, 2); +analyze table t_lead1, t_lead2; +set @@tidb_opt_join_reorder_threshold = 2; +set @@tidb_enable_outer_join_reorder = 1; +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from +(t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), +(t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +id task access object operator info +HashJoin root CARTESIAN inner join +├─MergeJoin(Build) root inner join, left key:planner__core__join_reorder2.t_lead1.a, right key:planner__core__join_reorder2.t_lead2.a +│ ├─IndexReader(Build) root index:IndexFullScan +│ │ └─IndexFullScan cop[tikv] table:t_lead2, index:idx_a(a) keep order:true +│ └─IndexReader(Probe) root index:IndexFullScan +│ └─IndexFullScan cop[tikv] table:t_lead1, index:idx_a(a) keep order:true +└─HashJoin(Probe) root inner join, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed3.col1)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed3 keep order:false + └─HashJoin(Probe) root left outer join, left side:TableReader, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed2 keep order:false + └─TableReader(Probe) root data:TableFullScan + └─TableFullScan cop[tikv] table:t_seed1 keep order:false +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from +(t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), +(t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +id task access object operator info +Projection root planner__core__join_reorder2.t_lead1.id, planner__core__join_reorder2.t_lead1.a, planner__core__join_reorder2.t_lead2.id, planner__core__join_reorder2.t_lead2.a, planner__core__join_reorder2.t_seed1.id, planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.id, planner__core__join_reorder2.t_seed2.col2, planner__core__join_reorder2.t_seed3.id, planner__core__join_reorder2.t_seed3.col1 +└─HashJoin root CARTESIAN inner join + ├─MergeJoin(Build) root inner join, left key:planner__core__join_reorder2.t_lead1.a, right key:planner__core__join_reorder2.t_lead2.a + │ ├─IndexReader(Build) root index:IndexFullScan + │ │ └─IndexFullScan cop[tikv] table:t_lead2, index:idx_a(a) keep order:true + │ └─IndexReader(Probe) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_lead1, index:idx_a(a) keep order:true + └─HashJoin(Probe) root left outer join, left side:MergeJoin, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed2 keep order:false + └─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_seed1.col1, right key:planner__core__join_reorder2.t_seed3.col1 + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_seed3, index:idx_col1(col1) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_seed1, index:idx_col1(col1) keep order:true +set @@tidb_opt_enable_advanced_join_reorder = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from +(t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), +(t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +id task access object operator info +HashJoin root CARTESIAN inner join +├─MergeJoin(Build) root inner join, left key:planner__core__join_reorder2.t_lead1.a, right key:planner__core__join_reorder2.t_lead2.a +│ ├─IndexReader(Build) root index:IndexFullScan +│ │ └─IndexFullScan cop[tikv] table:t_lead2, index:idx_a(a) keep order:true +│ └─IndexReader(Probe) root index:IndexFullScan +│ └─IndexFullScan cop[tikv] table:t_lead1, index:idx_a(a) keep order:true +└─HashJoin(Probe) root inner join, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed3.col1)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed3 keep order:false + └─HashJoin(Probe) root left outer join, left side:TableReader, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed2 keep order:false + └─TableReader(Probe) root data:TableFullScan + └─TableFullScan cop[tikv] table:t_seed1 keep order:false +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from +(t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), +(t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +id task access object operator info +Projection root planner__core__join_reorder2.t_lead1.id, planner__core__join_reorder2.t_lead1.a, planner__core__join_reorder2.t_lead2.id, planner__core__join_reorder2.t_lead2.a, planner__core__join_reorder2.t_seed1.id, planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.id, planner__core__join_reorder2.t_seed2.col2, planner__core__join_reorder2.t_seed3.id, planner__core__join_reorder2.t_seed3.col1 +└─HashJoin root CARTESIAN inner join + ├─MergeJoin(Build) root inner join, left key:planner__core__join_reorder2.t_lead1.a, right key:planner__core__join_reorder2.t_lead2.a + │ ├─IndexReader(Build) root index:IndexFullScan + │ │ └─IndexFullScan cop[tikv] table:t_lead2, index:idx_a(a) keep order:true + │ └─IndexReader(Probe) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_lead1, index:idx_a(a) keep order:true + └─HashJoin(Probe) root left outer join, left side:MergeJoin, equal:[eq(planner__core__join_reorder2.t_seed1.col1, planner__core__join_reorder2.t_seed2.col2)] + ├─TableReader(Build) root data:TableFullScan + │ └─TableFullScan cop[tikv] table:t_seed2 keep order:false + └─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_seed1.col1, right key:planner__core__join_reorder2.t_seed3.col1 + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_seed3, index:idx_col1(col1) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_seed1, index:idx_col1(col1) keep order:true +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +drop table if exists t_cc1, t_cc2, t_cc3, t_cc4; +create table t_cc1(id int not null primary key, a int not null); +create table t_cc2(id int not null primary key, a int not null, key idx_a(a)); +create table t_cc3(id int not null primary key, a int not null, key idx_a(a)); +create table t_cc4(id int not null primary key, a int not null, key idx_a(a)); +insert into t_cc1 values (1, 1); +insert into t_cc2 values (1, 1); +insert into t_cc2 select id + 1, a + 1 from t_cc2; +insert into t_cc2 select id + 2, a + 2 from t_cc2; +insert into t_cc2 select id + 4, a + 4 from t_cc2; +insert into t_cc2 select id + 8, a + 8 from t_cc2; +insert into t_cc2 select id + 16, a + 16 from t_cc2; +insert into t_cc2 select id + 32, a + 32 from t_cc2; +insert into t_cc2 select id + 64, a + 64 from t_cc2; +insert into t_cc2 select id + 128, a + 128 from t_cc2; +insert into t_cc2 select id + 256, a + 256 from t_cc2; +insert into t_cc2 select id + 512, a + 512 from t_cc2; +delete from t_cc2 where id > 1000; +insert into t_cc3 values +(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), +(6, 6), (7, 7), (8, 8), (9, 9), (10, 10); +insert into t_cc4 select * from t_cc3; +analyze table t_cc1, t_cc2, t_cc3, t_cc4; +set @@tidb_opt_join_reorder_threshold = 2; +set @@tidb_opt_cartesian_join_order_threshold = 0; +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select * from +(t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), +(t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +id task access object operator info +HashJoin root CARTESIAN inner join +├─IndexJoin(Build) root inner join, inner:IndexReader, outer key:planner__core__join_reorder2.t_cc1.a, inner key:planner__core__join_reorder2.t_cc2.a, equal cond:eq(planner__core__join_reorder2.t_cc1.a, planner__core__join_reorder2.t_cc2.a) +│ ├─TableReader(Build) root data:TableFullScan +│ │ └─TableFullScan cop[tikv] table:t_cc1 keep order:false +│ └─IndexReader(Probe) root index:IndexRangeScan +│ └─IndexRangeScan cop[tikv] table:t_cc2, index:idx_a(a) range: decided by [eq(planner__core__join_reorder2.t_cc2.a, planner__core__join_reorder2.t_cc1.a)], keep order:false +└─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_cc3.a, right key:planner__core__join_reorder2.t_cc4.a + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_cc4, index:idx_a(a) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_cc3, index:idx_a(a) keep order:true +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select * from +(t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), +(t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +id task access object operator info +HashJoin root CARTESIAN inner join +├─IndexJoin(Build) root inner join, inner:IndexReader, outer key:planner__core__join_reorder2.t_cc1.a, inner key:planner__core__join_reorder2.t_cc2.a, equal cond:eq(planner__core__join_reorder2.t_cc1.a, planner__core__join_reorder2.t_cc2.a) +│ ├─TableReader(Build) root data:TableFullScan +│ │ └─TableFullScan cop[tikv] table:t_cc1 keep order:false +│ └─IndexReader(Probe) root index:IndexRangeScan +│ └─IndexRangeScan cop[tikv] table:t_cc2, index:idx_a(a) range: decided by [eq(planner__core__join_reorder2.t_cc2.a, planner__core__join_reorder2.t_cc1.a)], keep order:false +└─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_cc3.a, right key:planner__core__join_reorder2.t_cc4.a + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_cc4, index:idx_a(a) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_cc3, index:idx_a(a) keep order:true +set @@tidb_opt_enable_advanced_join_reorder = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select * from +(t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), +(t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +id task access object operator info +HashJoin root CARTESIAN inner join +├─IndexJoin(Build) root inner join, inner:IndexReader, outer key:planner__core__join_reorder2.t_cc1.a, inner key:planner__core__join_reorder2.t_cc2.a, equal cond:eq(planner__core__join_reorder2.t_cc1.a, planner__core__join_reorder2.t_cc2.a) +│ ├─TableReader(Build) root data:TableFullScan +│ │ └─TableFullScan cop[tikv] table:t_cc1 keep order:false +│ └─IndexReader(Probe) root index:IndexRangeScan +│ └─IndexRangeScan cop[tikv] table:t_cc2, index:idx_a(a) range: decided by [eq(planner__core__join_reorder2.t_cc2.a, planner__core__join_reorder2.t_cc1.a)], keep order:false +└─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_cc3.a, right key:planner__core__join_reorder2.t_cc4.a + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_cc4, index:idx_a(a) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_cc3, index:idx_a(a) keep order:true +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select * from +(t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), +(t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +id task access object operator info +HashJoin root CARTESIAN inner join +├─IndexJoin(Build) root inner join, inner:IndexReader, outer key:planner__core__join_reorder2.t_cc1.a, inner key:planner__core__join_reorder2.t_cc2.a, equal cond:eq(planner__core__join_reorder2.t_cc1.a, planner__core__join_reorder2.t_cc2.a) +│ ├─TableReader(Build) root data:TableFullScan +│ │ └─TableFullScan cop[tikv] table:t_cc1 keep order:false +│ └─IndexReader(Probe) root index:IndexRangeScan +│ └─IndexRangeScan cop[tikv] table:t_cc2, index:idx_a(a) range: decided by [eq(planner__core__join_reorder2.t_cc2.a, planner__core__join_reorder2.t_cc1.a)], keep order:false +└─MergeJoin(Probe) root inner join, left key:planner__core__join_reorder2.t_cc3.a, right key:planner__core__join_reorder2.t_cc4.a + ├─IndexReader(Build) root index:IndexFullScan + │ └─IndexFullScan cop[tikv] table:t_cc4, index:idx_a(a) keep order:true + └─IndexReader(Probe) root index:IndexFullScan + └─IndexFullScan cop[tikv] table:t_cc3, index:idx_a(a) keep order:true +drop table if exists t_pen1, t_pen2, t_pen3; +create table t_pen1(a int, b int, c int); +create table t_pen2(a int, b int, c int); +create table t_pen3(a int, b int, c int); +insert into t_pen1 values (1, 1, 1); +insert into t_pen3 values (1, 1, 1); +analyze table t_pen1, t_pen2, t_pen3; +set @@tidb_opt_join_reorder_threshold = 2; +set @@tidb_opt_cartesian_join_order_threshold = 100; +set @@tidb_opt_greedy_join_seed_by_cost = 1; +set @@tidb_opt_enable_advanced_join_reorder = 0; +explain format = 'plan_tree' select * from t_pen1, t_pen2, t_pen3 where t_pen1.a = t_pen2.a and t_pen2.b = t_pen3.b; +id task access object operator info +HashJoin root inner join, equal:[eq(planner__core__join_reorder2.t_pen2.b, planner__core__join_reorder2.t_pen3.b)] +├─TableReader(Build) root data:Selection +│ └─Selection cop[tikv] not(isnull(planner__core__join_reorder2.t_pen3.b)) +│ └─TableFullScan cop[tikv] table:t_pen3 keep order:false +└─HashJoin(Probe) root inner join, equal:[eq(planner__core__join_reorder2.t_pen1.a, planner__core__join_reorder2.t_pen2.a)] + ├─TableReader(Build) root data:Selection + │ └─Selection cop[tikv] not(isnull(planner__core__join_reorder2.t_pen1.a)) + │ └─TableFullScan cop[tikv] table:t_pen1 keep order:false + └─TableReader(Probe) root data:Selection + └─Selection cop[tikv] not(isnull(planner__core__join_reorder2.t_pen2.a)), not(isnull(planner__core__join_reorder2.t_pen2.b)) + └─TableFullScan cop[tikv] table:t_pen2 keep order:false, stats:pseudo +set @@tidb_opt_enable_advanced_join_reorder = 1; +explain format = 'plan_tree' select * from t_pen1, t_pen2, t_pen3 where t_pen1.a = t_pen2.a and t_pen2.b = t_pen3.b; +id task access object operator info +HashJoin root inner join, equal:[eq(planner__core__join_reorder2.t_pen2.b, planner__core__join_reorder2.t_pen3.b)] +├─TableReader(Build) root data:Selection +│ └─Selection cop[tikv] not(isnull(planner__core__join_reorder2.t_pen3.b)) +│ └─TableFullScan cop[tikv] table:t_pen3 keep order:false +└─HashJoin(Probe) root inner join, equal:[eq(planner__core__join_reorder2.t_pen1.a, planner__core__join_reorder2.t_pen2.a)] + ├─TableReader(Build) root data:Selection + │ └─Selection cop[tikv] not(isnull(planner__core__join_reorder2.t_pen1.a)) + │ └─TableFullScan cop[tikv] table:t_pen1 keep order:false + └─TableReader(Probe) root data:Selection + └─Selection cop[tikv] not(isnull(planner__core__join_reorder2.t_pen2.a)), not(isnull(planner__core__join_reorder2.t_pen2.b)) + └─TableFullScan cop[tikv] table:t_pen2 keep order:false, stats:pseudo +set @@tidb_opt_join_reorder_threshold = 0; +set @@tidb_opt_cartesian_join_order_threshold = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +drop table if exists t_probe_base, t_probe_eq, t_probe_cast; +create table t_probe_base(id int not null primary key, payload int not null); +create table t_probe_eq(sid varchar(20) not null, key idx_sid(sid)); +create table t_probe_cast(id int not null, key idx_id(id)); +insert into t_probe_base values (1, 1), (2, 2), (3, 3), (4, 4); +insert into t_probe_eq values ('1'); +insert into t_probe_cast values (1), (2), (3), (4), (5), (6), (7), (8); +analyze table t_probe_base, t_probe_eq, t_probe_cast; +set @@tidb_opt_join_reorder_threshold = 0; +set @@tidb_opt_enable_advanced_join_reorder = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'brief' select * from +(select cast(id as char) as sid, payload from t_probe_base) d +join t_probe_eq on d.sid = t_probe_eq.sid +join t_probe_cast on t_probe_cast.id = d.sid; +id estRows task access object operator info +Projection 0.80 root Column#4, planner__core__join_reorder2.t_probe_base.payload, planner__core__join_reorder2.t_probe_eq.sid, planner__core__join_reorder2.t_probe_cast.id +└─IndexJoin 0.80 root inner join, inner:Projection, outer key:Column#17, inner key:Column#8, equal cond:eq(Column#17, Column#8) + ├─Projection(Build) 0.80 root Column#4, planner__core__join_reorder2.t_probe_base.payload, planner__core__join_reorder2.t_probe_eq.sid, cast(Column#4, bigint BINARY)->Column#17 + │ └─Selection 0.80 root eq(cast(cast(Column#4, bigint BINARY), double BINARY), cast(Column#4, double BINARY)) + │ └─Projection 1.00 root Column#4, planner__core__join_reorder2.t_probe_base.payload, planner__core__join_reorder2.t_probe_eq.sid + │ └─HashJoin 1.00 root inner join, equal:[eq(planner__core__join_reorder2.t_probe_eq.sid, Column#4)] + │ ├─IndexReader(Build) 1.00 root index:IndexFullScan + │ │ └─IndexFullScan 1.00 cop[tikv] table:t_probe_eq, index:idx_sid(sid) keep order:false + │ └─Projection(Probe) 3.20 root cast(planner__core__join_reorder2.t_probe_base.id, var_string(11))->Column#4, planner__core__join_reorder2.t_probe_base.payload + │ └─TableReader 3.20 root data:Selection + │ └─Selection 3.20 cop[tikv] not(isnull(cast(planner__core__join_reorder2.t_probe_base.id, var_string(11)))) + │ └─TableFullScan 4.00 cop[tikv] table:t_probe_base keep order:false + └─Projection(Probe) 0.80 root planner__core__join_reorder2.t_probe_cast.id, planner__core__join_reorder2.t_probe_cast.id->Column#8 + └─IndexReader 0.80 root index:IndexRangeScan + └─IndexRangeScan 0.80 cop[tikv] table:t_probe_cast, index:idx_id(id) range: decided by [eq(planner__core__join_reorder2.t_probe_cast.id, Column#17)], keep order:false +set @@tidb_opt_enable_advanced_join_reorder = 1; +explain format = 'brief' select * from +(select cast(id as char) as sid, payload from t_probe_base) d +join t_probe_eq on d.sid = t_probe_eq.sid +join t_probe_cast on t_probe_cast.id = d.sid; +id estRows task access object operator info +Projection 1.00 root Column#4, planner__core__join_reorder2.t_probe_base.payload, planner__core__join_reorder2.t_probe_eq.sid, planner__core__join_reorder2.t_probe_cast.id +└─IndexJoin 1.00 root inner join, inner:Projection, outer key:Column#17, inner key:Column#8, equal cond:eq(Column#17, Column#8) + ├─Projection(Build) 1.00 root Column#4, planner__core__join_reorder2.t_probe_base.payload, planner__core__join_reorder2.t_probe_eq.sid, cast(Column#4, bigint BINARY)->Column#17 + │ └─Projection 1.00 root Column#4, planner__core__join_reorder2.t_probe_base.payload, planner__core__join_reorder2.t_probe_eq.sid + │ └─HashJoin 1.00 root inner join, equal:[eq(planner__core__join_reorder2.t_probe_eq.sid, Column#4)], other cond:eq(cast(cast(Column#4, bigint BINARY), double BINARY), cast(Column#4, double BINARY)) + │ ├─IndexReader(Build) 1.00 root index:IndexFullScan + │ │ └─IndexFullScan 1.00 cop[tikv] table:t_probe_eq, index:idx_sid(sid) keep order:false + │ └─Projection(Probe) 3.20 root cast(planner__core__join_reorder2.t_probe_base.id, var_string(11))->Column#4, planner__core__join_reorder2.t_probe_base.payload + │ └─TableReader 3.20 root data:Selection + │ └─Selection 3.20 cop[tikv] not(isnull(cast(planner__core__join_reorder2.t_probe_base.id, var_string(11)))) + │ └─TableFullScan 4.00 cop[tikv] table:t_probe_base keep order:false + └─Projection(Probe) 1.00 root planner__core__join_reorder2.t_probe_cast.id, planner__core__join_reorder2.t_probe_cast.id->Column#8 + └─IndexReader 1.00 root index:IndexRangeScan + └─IndexRangeScan 1.00 cop[tikv] table:t_probe_cast, index:idx_id(id) range: decided by [eq(planner__core__join_reorder2.t_probe_cast.id, Column#17)], keep order:false +set @@tidb_opt_greedy_join_seed_by_cost = 0; diff --git a/tests/integrationtest/t/planner/core/join_reorder2.test b/tests/integrationtest/t/planner/core/join_reorder2.test index 043c319bbe0bb..fd4ce8555e481 100644 --- a/tests/integrationtest/t/planner/core/join_reorder2.test +++ b/tests/integrationtest/t/planner/core/join_reorder2.test @@ -82,3 +82,154 @@ explain format = 'plan_tree' select /*+ leading(t1@sel_3, t5, t4@sel_2, t2@sel_3 inner join t4 on sub1.id=t4.id ) sub2 inner join t5 on sub2.id=t5.id; + +# Greedy join reorder can pick the smallest base table as the seed even when +# the cheapest first join should start from a different pair. +drop table if exists t_seed1, t_seed2, t_seed3; +create table t_seed1(id int not null primary key, col1 int not null, key idx_col1(col1)); +create table t_seed2(id int not null primary key, col2 int not null); +create table t_seed3(id int not null primary key, col1 int not null, key idx_col1(col1)); +insert into t_seed1 values + (1, 1), (2, 2), (3, 3), (4, 4), + (5, 5), (6, 6), (7, 7), (8, 8), + (9, 9), (10, 10), (11, 11), (12, 12), + (13, 13), (14, 14), (15, 15), (16, 16); +insert into t_seed2 values (1, 1); +insert into t_seed3 values (1, 1), (2, 2); +analyze table t_seed1, t_seed2, t_seed3; +set @@tidb_enable_outer_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select * from + (select t_seed1.id, t_seed1.col1 from t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2) tt + join t_seed3 on t_seed3.col1 = tt.col1; +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select * from + (select t_seed1.id, t_seed1.col1 from t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2) tt + join t_seed3 on t_seed3.col1 = tt.col1; +set @@tidb_opt_enable_advanced_join_reorder = 0; +explain format = 'plan_tree' select * from + (select t_seed1.id, t_seed1.col1 from t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2) tt + join t_seed3 on t_seed3.col1 = tt.col1; +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; + +# LEADING should only lock the hinted component. A later disconnected component +# should still be allowed to reseed by cost in both greedy implementations. +drop table if exists t_lead1, t_lead2; +create table t_lead1(id int not null primary key, a int not null, key idx_a(a)); +create table t_lead2(id int not null primary key, a int not null, key idx_a(a)); +insert into t_lead1 values (1, 1), (2, 2); +insert into t_lead2 values (1, 1), (2, 2); +analyze table t_lead1, t_lead2; +set @@tidb_opt_join_reorder_threshold = 2; +set @@tidb_enable_outer_join_reorder = 1; +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from + (t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), + (t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from + (t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), + (t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +set @@tidb_opt_enable_advanced_join_reorder = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from + (t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), + (t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select /*+ leading(t_lead1, t_lead2) */ * from + (t_lead1 join t_lead2 on t_lead1.a = t_lead2.a), + (t_seed1 left join t_seed2 on t_seed1.col1 = t_seed2.col2 join t_seed3 on t_seed3.col1 = t_seed1.col1); +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; + +# Seed-by-cost should only change the first join inside the current connected component, +# not pull a cheaper seed pair from a later disconnected component. +drop table if exists t_cc1, t_cc2, t_cc3, t_cc4; +create table t_cc1(id int not null primary key, a int not null); +create table t_cc2(id int not null primary key, a int not null, key idx_a(a)); +create table t_cc3(id int not null primary key, a int not null, key idx_a(a)); +create table t_cc4(id int not null primary key, a int not null, key idx_a(a)); +insert into t_cc1 values (1, 1); +insert into t_cc2 values (1, 1); +insert into t_cc2 select id + 1, a + 1 from t_cc2; +insert into t_cc2 select id + 2, a + 2 from t_cc2; +insert into t_cc2 select id + 4, a + 4 from t_cc2; +insert into t_cc2 select id + 8, a + 8 from t_cc2; +insert into t_cc2 select id + 16, a + 16 from t_cc2; +insert into t_cc2 select id + 32, a + 32 from t_cc2; +insert into t_cc2 select id + 64, a + 64 from t_cc2; +insert into t_cc2 select id + 128, a + 128 from t_cc2; +insert into t_cc2 select id + 256, a + 256 from t_cc2; +insert into t_cc2 select id + 512, a + 512 from t_cc2; +delete from t_cc2 where id > 1000; +insert into t_cc3 values + (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), + (6, 6), (7, 7), (8, 8), (9, 9), (10, 10); +insert into t_cc4 select * from t_cc3; +analyze table t_cc1, t_cc2, t_cc3, t_cc4; +set @@tidb_opt_join_reorder_threshold = 2; +set @@tidb_opt_cartesian_join_order_threshold = 0; +set @@tidb_opt_enable_advanced_join_reorder = 1; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select * from + (t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), + (t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select * from + (t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), + (t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +set @@tidb_opt_enable_advanced_join_reorder = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 0; +explain format = 'plan_tree' select * from + (t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), + (t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'plan_tree' select * from + (t_cc1 join t_cc2 on t_cc1.a = t_cc2.a), + (t_cc3 join t_cc4 on t_cc3.a = t_cc4.a); + +# Seed-by-cost should not seed a connected component from a pure cartesian pair. +drop table if exists t_pen1, t_pen2, t_pen3; +create table t_pen1(a int, b int, c int); +create table t_pen2(a int, b int, c int); +create table t_pen3(a int, b int, c int); +insert into t_pen1 values (1, 1, 1); +insert into t_pen3 values (1, 1, 1); +analyze table t_pen1, t_pen2, t_pen3; +set @@tidb_opt_join_reorder_threshold = 2; +set @@tidb_opt_cartesian_join_order_threshold = 100; +set @@tidb_opt_greedy_join_seed_by_cost = 1; +set @@tidb_opt_enable_advanced_join_reorder = 0; +explain format = 'plan_tree' select * from t_pen1, t_pen2, t_pen3 where t_pen1.a = t_pen2.a and t_pen2.b = t_pen3.b; +set @@tidb_opt_enable_advanced_join_reorder = 1; +explain format = 'plan_tree' select * from t_pen1, t_pen2, t_pen3 where t_pen1.a = t_pen2.a and t_pen2.b = t_pen3.b; +set @@tidb_opt_join_reorder_threshold = 0; +set @@tidb_opt_cartesian_join_order_threshold = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 0; + +# Seed-by-cost probing should work through a derived projection leaf that also +# participates in a casted join key. This keeps an end-to-end coverage point for +# the shape exercised by the side-effect regression tests. +drop table if exists t_probe_base, t_probe_eq, t_probe_cast; +create table t_probe_base(id int not null primary key, payload int not null); +create table t_probe_eq(sid varchar(20) not null, key idx_sid(sid)); +create table t_probe_cast(id int not null, key idx_id(id)); +insert into t_probe_base values (1, 1), (2, 2), (3, 3), (4, 4); +insert into t_probe_eq values ('1'); +insert into t_probe_cast values (1), (2), (3), (4), (5), (6), (7), (8); +analyze table t_probe_base, t_probe_eq, t_probe_cast; +set @@tidb_opt_join_reorder_threshold = 0; +set @@tidb_opt_enable_advanced_join_reorder = 0; +set @@tidb_opt_greedy_join_seed_by_cost = 1; +explain format = 'brief' select * from + (select cast(id as char) as sid, payload from t_probe_base) d + join t_probe_eq on d.sid = t_probe_eq.sid + join t_probe_cast on t_probe_cast.id = d.sid; +set @@tidb_opt_enable_advanced_join_reorder = 1; +explain format = 'brief' select * from + (select cast(id as char) as sid, payload from t_probe_base) d + join t_probe_eq on d.sid = t_probe_eq.sid + join t_probe_cast on t_probe_cast.id = d.sid; +set @@tidb_opt_greedy_join_seed_by_cost = 0;