From 598a386f1d1a1c61897e0f2631e9232aa468421b Mon Sep 17 00:00:00 2001 From: tpp <146148086+terry1purcell@users.noreply.github.com> Date: Wed, 13 May 2026 06:58:15 -0700 Subject: [PATCH] This is an automated cherry-pick of #65626 Signed-off-by: ti-chi-bot --- .gitignore | 24 + pkg/expression/BUILD.bazel | 6 + pkg/expression/builtin.go | 7 + pkg/expression/builtin_fts.go | 167 +++++++ .../builtin_threadunsafe_generated.go | 457 +++++++++++++++++ pkg/expression/distsql_builtin.go | 12 + pkg/expression/fts_to_like.go | 438 ++++++++++++++++ pkg/expression/fts_to_like_test.go | 340 +++++++++++++ pkg/expression/function_traits_test.go | 285 +++++++++++ pkg/expression/infer_pushdown.go | 17 + .../integration_test/integration_test.go | 203 ++++++++ pkg/parser/ast/functions.go | 7 + pkg/planner/cardinality/selectivity.go | 42 ++ pkg/planner/core/BUILD.bazel | 8 + pkg/planner/core/expression_rewriter.go | 380 ++++++++++++++ pkg/planner/core/fulltext_to_like.go | 76 +++ pkg/planner/core/fulltext_to_like_test.go | 134 +++++ pkg/planner/core/planbuilder.go | 46 ++ pkg/planner/optimize.go | 355 +++++++++++++ pkg/planner/util/null_misc_test.go | 467 ++++++++++++++++++ pkg/sessionctx/stmtctx/stmtctx.go | 115 +++++ tests/integrationtest/r/executor/show.result | 4 + .../r/planner/core/fulltext_search.result | 240 +++++++++ .../t/planner/core/fulltext_search.test | 321 ++++++++++++ 24 files changed, 4151 insertions(+) create mode 100644 pkg/expression/builtin_fts.go create mode 100644 pkg/expression/builtin_threadunsafe_generated.go create mode 100644 pkg/expression/fts_to_like.go create mode 100644 pkg/expression/fts_to_like_test.go create mode 100644 pkg/planner/core/fulltext_to_like.go create mode 100644 pkg/planner/core/fulltext_to_like_test.go create mode 100644 pkg/planner/util/null_misc_test.go create mode 100644 tests/integrationtest/r/planner/core/fulltext_search.result create mode 100644 tests/integrationtest/t/planner/core/fulltext_search.test diff --git a/.gitignore b/.gitignore index 7f622a5092a72..950a3c9966b4b 100644 --- a/.gitignore +++ b/.gitignore @@ -37,7 +37,31 @@ bazel-tidb MODULE.bazel.lock .ijwb/ /oom_record/ +<<<<<<< HEAD *.log.json genkeyword test_coverage coverage.dat +======= + +# Integration tests +tests/integrationtest/integration-test.out +tests/integrationtest/integrationtest_tidb-server +tests/integrationtest/s/ +tests/integrationtest/replayer/ + +# Local dev artifacts +bench_daily.json +compose-dev.yaml +fix.sql +export-20*/ +var + +# Personal config files +/*config.toml +.cache + +# Claude Code runtime state (per-user, not part of repo) +.claude/scheduled_tasks.lock +.claude/settings.local.json +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) diff --git a/pkg/expression/BUILD.bazel b/pkg/expression/BUILD.bazel index afcce9c4efbe5..72f80a887dea4 100644 --- a/pkg/expression/BUILD.bazel +++ b/pkg/expression/BUILD.bazel @@ -60,6 +60,11 @@ go_library( "expr_to_pb.go", "expression.go", "extension.go", +<<<<<<< HEAD +======= + "fts_helper.go", + "fts_to_like.go", +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) "function_traits.go", "grouping_sets.go", "helper.go", @@ -193,6 +198,7 @@ go_test( "evaluator_test.go", "expr_to_pb_test.go", "expression_test.go", + "fts_to_like_test.go", "function_traits_test.go", "grouping_sets_test.go", "helper_test.go", diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index bbe1f2ac0fc04..c9f516430e79d 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -969,6 +969,13 @@ var funcs = map[string]functionClass{ ast.VecFromText: &vecFromTextFunctionClass{baseFunctionClass{ast.VecFromText, 1, 1}}, ast.VecAsText: &vecAsTextFunctionClass{baseFunctionClass{ast.VecAsText, 1, 1}}, +<<<<<<< HEAD +======= + // fts functions + ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, 2}}, + ast.FTSMysqlMatchAgainst: &ftsMysqlMatchAgainstFunctionClass{baseFunctionClass{ast.FTSMysqlMatchAgainst, 2, -1}}, + +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) // TiDB internal function. ast.TiDBDecodeKey: &tidbDecodeKeyFunctionClass{baseFunctionClass{ast.TiDBDecodeKey, 1, 1}}, ast.TiDBMVCCInfo: &tidbMVCCInfoFunctionClass{baseFunctionClass: baseFunctionClass{ast.TiDBMVCCInfo, 1, 1}}, diff --git a/pkg/expression/builtin_fts.go b/pkg/expression/builtin_fts.go new file mode 100644 index 0000000000000..430b0dabb371e --- /dev/null +++ b/pkg/expression/builtin_fts.go @@ -0,0 +1,167 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/chunk" + "github.com/pingcap/tipb/go-tipb" +) + +var ( + _ functionClass = &ftsMatchWordFunctionClass{} + _ functionClass = &ftsMysqlMatchAgainstFunctionClass{} +) + +var ( + _ builtinFunc = &builtinFtsMatchWordSig{} + _ builtinFunc = &builtinFtsMysqlMatchAgainstSig{} +) + +type ftsMatchWordFunctionClass struct { + baseFunctionClass +} + +type builtinFtsMatchWordSig struct { + baseBuiltinFunc +} + +type ftsMysqlMatchAgainstFunctionClass struct { + baseFunctionClass +} + +type builtinFtsMysqlMatchAgainstSig struct { + baseBuiltinFunc + modifier ast.FulltextSearchModifier +} + +func (b *builtinFtsMatchWordSig) Clone() builtinFunc { + newSig := &builtinFtsMatchWordSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + return newSig +} + +func (b *builtinFtsMysqlMatchAgainstSig) Clone() builtinFunc { + newSig := &builtinFtsMysqlMatchAgainstSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + newSig.modifier = b.modifier + return newSig +} + +func (b *builtinFtsMysqlMatchAgainstSig) SetModifier(modifier ast.FulltextSearchModifier) { + b.modifier = modifier +} + +// SetFTSMysqlMatchAgainstModifier sets the modifier for the internal `MATCH ... AGAINST` builtin signature. +// It is expected to be called by planner right after building the scalar function. +func SetFTSMysqlMatchAgainstModifier(sf *ScalarFunction, modifier ast.FulltextSearchModifier) error { + sig, ok := sf.Function.(*builtinFtsMysqlMatchAgainstSig) + if !ok { + return errors.Errorf("unexpected builtin signature for %s: %T", ast.FTSMysqlMatchAgainst, sf.Function) + } + sig.SetModifier(modifier) + return nil +} + +func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + + argAgainst := args[0] + argAgainstConstant, ok := argAgainst.(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") + } + if argAgainstConstant.Value.Kind() != types.KindString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") + } + argsMatch := args[1:] + for _, arg := range argsMatch { + _, ok := arg.(*Column) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("not matching a column") + } + } + + argTps := make([]types.EvalType, 0, len(args)) + argTps = append(argTps, types.ETString, types.ETString) + + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETReal, argTps...) + if err != nil { + return nil, err + } + + sig := &builtinFtsMatchWordSig{bf} + sig.setPbCode(tipb.ScalarFuncSig_FTSMatchWord) + return sig, nil +} + +func (b *builtinFtsMatchWordSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { + // Reject executing match against in TiDB side. + return 0, false, errors.Errorf("cannot use 'FTS_MATCH_WORD()' outside of fulltext index") +} + +func (c *ftsMysqlMatchAgainstFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + + argAgainst := args[0] + argAgainstConstant, ok := argAgainst.(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") + } + if argAgainstConstant.Value.Kind() != types.KindString && !argAgainstConstant.Value.IsNull() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-string constant") + } + + argsMatch := args[1:] + for _, arg := range argsMatch { + _, ok := arg.(*Column) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("not matching a column") + } + } + + argTps := make([]types.EvalType, 0, len(args)) + argTps = append(argTps, types.ETString) + for _, arg := range argsMatch { + if arg.GetType(ctx.GetEvalCtx()).EvalType() != types.ETString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + } + argTps = append(argTps, types.ETString) + } + + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETReal, argTps...) + if err != nil { + return nil, err + } + + sig := &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: bf} + sig.setPbCode(tipb.ScalarFuncSig_FTSMatchExpression) + return sig, nil +} + +func (b *builtinFtsMysqlMatchAgainstSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { + // args[0] is validated to be a *Constant by getFunction; guard defensively + // since the sig may be reconstructed via the distsql path without that check. + if constArg, ok := b.args[0].(*Constant); ok && constArg.Value.IsNull() { + return 0, true, nil + } + return 0, false, errors.Errorf("cannot use 'MATCH ... AGAINST' outside of fulltext index") +} diff --git a/pkg/expression/builtin_threadunsafe_generated.go b/pkg/expression/builtin_threadunsafe_generated.go new file mode 100644 index 0000000000000..2140fab768df1 --- /dev/null +++ b/pkg/expression/builtin_threadunsafe_generated.go @@ -0,0 +1,457 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by go generate in expression/generator; DO NOT EDIT. + +package expression + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinArithmeticMultiplyRealSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinGreatestCmpStringAsTimeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinGreatestTimeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLeastCmpStringAsTimeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLeastTimeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinIntervalIntSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinIntervalRealSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinInternalFromBinarySig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAesDecryptSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAesDecryptIVSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAesEncryptSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAesEncryptIVSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValidatePasswordStrengthSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFtsMysqlMatchAgainstSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinIlikeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFoundRowsSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinCurrentUserSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinCurrentRoleSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinCurrentResourceGroupSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinUserSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinConnectionIDSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLastInsertIDSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLastInsertIDWithIDSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBIsDDLOwnerSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinBenchmarkSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRowCountSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBMVCCInfoSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBEncodeRecordKeySig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBEncodeIndexKeySig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBDecodeKeySig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBDecodeSQLDigestsSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinNextValSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLastValSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSetValSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinJSONSchemaValidSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLikeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRandSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSleepSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLockSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinReleaseLockSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFreeLockSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinUsedLockSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinReleaseAllLocksSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinVectorFloat32IsTrueSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinVectorFloat32IsFalseSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinUnaryMinusDecimalSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSetStringVarSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSetRealVarSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSetDecimalVarSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSetIntVarSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSetTimeVarSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesIntSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesRealSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesDecimalSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesStringSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesTimeSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesDurationSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesJSONSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinValuesVectorFloat32Sig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRegexpLikeFuncSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRegexpSubstrFuncSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRegexpInStrFuncSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRegexpReplaceFuncSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinConcatSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinConcatWSSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRepeatSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinSpaceSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLpadSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinLpadUTF8Sig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRpadSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinRpadUTF8Sig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFindInSetSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFromBase64Sig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinToBase64Sig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinInsertSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinInsertUTF8Sig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinWeightStringSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinDateLiteralSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTimeLiteralSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAddSubDateAsStringSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAddSubDateDatetimeAnySig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinAddSubDateDurationAnySig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTimestamp1ArgSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTimestamp2ArgsSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTimestampLiteralSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinConvertTzSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBBoundedStalenessSig) SafeToShareAcrossSession() bool { + return false +} + +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinTiDBCurrentTsoSig) SafeToShareAcrossSession() bool { + return false +} diff --git a/pkg/expression/distsql_builtin.go b/pkg/expression/distsql_builtin.go index 9ef1cea6a0c55..a3c4658f1ffa6 100644 --- a/pkg/expression/distsql_builtin.go +++ b/pkg/expression/distsql_builtin.go @@ -1149,6 +1149,18 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie f = &builtinVecCosineDistanceSig{base} case tipb.ScalarFuncSig_VecL2NormSig: f = &builtinVecL2NormSig{base} +<<<<<<< HEAD +======= + case tipb.ScalarFuncSig_FTSMatchWord: + f = &builtinFtsMatchWordSig{base} + case tipb.ScalarFuncSig_FTSMatchExpression: + // NOTE: builtinFtsMysqlMatchAgainstSig.modifier is not serialized in the + // protobuf encoding because the tipb schema has no FTS metadata message. + // The reconstructed sig therefore uses the zero modifier value + // (FulltextSearchModifierNaturalLanguageMode). TiFlash must derive the + // search mode from other context when executing this expression. + f = &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: base} +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) default: e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode) return nil, e diff --git a/pkg/expression/fts_to_like.go b/pkg/expression/fts_to_like.go new file mode 100644 index 0000000000000..19e46dbbbe075 --- /dev/null +++ b/pkg/expression/fts_to_like.go @@ -0,0 +1,438 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" +) + +// ftsSearchTerm represents a single token in a boolean-mode FTS search string +// surviving the strict-subset validator: a plain alphanumeric word optionally +// prefixed with `+` (required) or `-` (excluded). +type ftsSearchTerm struct { + word string + isRequired bool + isExcluded bool +} + +// parseFTSBooleanSearchString splits a boolean-mode search string into terms. +// Inputs reach this function only after ValidateFTSSearchStringForLikeFallback +// has accepted them, so every whitespace-separated field is either a bare +// alphanumeric word or `+word`/`-word`. +func parseFTSBooleanSearchString(text string) []ftsSearchTerm { + fields := strings.Fields(text) + if len(fields) == 0 { + return nil + } + terms := make([]ftsSearchTerm, 0, len(fields)) + for _, w := range fields { + terms = append(terms, parseFTSSearchTerm(w)) + } + return terms +} + +// parseFTSSearchTerm parses a single boolean-mode token. The strict-subset +// validator guarantees `word`, `+word`, or `-word` with an alphanumeric body, +// so only the leading operator needs interpretation. +func parseFTSSearchTerm(word string) ftsSearchTerm { + if word == "" { + return ftsSearchTerm{} + } + switch word[0] { + case '+': + return ftsSearchTerm{word: word[1:], isRequired: true} + case '-': + return ftsSearchTerm{word: word[1:], isExcluded: true} + } + return ftsSearchTerm{word: word} +} + +// isFTSWordByte returns true for alphanumeric ASCII and non-ASCII bytes. +// Punctuation including underscore is NOT a word character, consistent with +// MySQL's built-in FTS tokenizer which treats _ as a word separator. Used by +// ValidateFTSSearchStringForLikeFallback to gate the LIKE rewrite. +func isFTSWordByte(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 +} + +// escapeFTSLikePattern escapes special LIKE characters (%, _, \) in the search term +// so they are treated as literal characters rather than wildcards. +func escapeFTSLikePattern(term string) string { + // Count special characters to pre-allocate the exact buffer size needed + escapeCount := 0 + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + escapeCount++ + } + } + + // Allocate exact size: original length + number of escape characters + var result strings.Builder + result.Grow(len(term) + escapeCount) + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + result.WriteByte('\\') + } + result.WriteByte(ch) + } + return result.String() +} + +// ValidateFTSSearchStringForLikeFallback reports whether searchText falls +// inside the strict subset that the LIKE fallback is allowed to translate. +// The supported subset is, by mode: +// +// - Boolean mode: each whitespace-separated token must be `word`, `+word`, +// or `-word`, where `word` consists of ASCII alphanumeric characters or +// non-ASCII UTF-8 bytes (the same definition used by isFTSWordByte). +// - Natural-language mode: each whitespace-separated token must be a `word` +// of the same alphanumeric form (no leading +/- operators). +// +// An empty or whitespace-only search string is valid; BuildFTSToILikeExpression +// short-circuits to a constant-0 result for it. +// +// Anything outside this subset (phrases, * prefix, > < ~ relevance modifiers, +// () grouping, mid-word punctuation like `xx-yy`, etc.) is rejected because +// MySQL FTS tokenizes those constructs in ways that differ from a substring +// LIKE match. The planner uses this signal to skip the LIKE fallback for +// rejected strings; the native FTSMysqlMatchAgainst builtin can still serve +// the query when an FTS index is available. +func ValidateFTSSearchStringForLikeFallback(searchText string, modifier ast.FulltextSearchModifier) error { + isBoolean := modifier.IsBooleanMode() + for _, token := range strings.Fields(searchText) { + body := token + // strings.Fields never returns an empty token (consecutive whitespace + // is collapsed), so body[0] is safe today. Keep the len(body) > 0 + // guard explicit so the indexing is obviously bounded and the check + // stays correct if the tokenization ever changes. + if isBoolean && len(body) > 0 && (body[0] == '+' || body[0] == '-') { + body = body[1:] + } + if body == "" { + return ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST search term '" + token + "' is not supported in the LIKE fallback") + } + for i := range len(body) { + if !isFTSWordByte(body[i]) { + return ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST search term '" + token + "' is not supported in the LIKE fallback") + } + } + } + return nil +} + +// BuildFTSToILikeExpression converts a MATCH...AGAINST input (a list of column +// expressions, the search-string literal, and the parsed modifier) into an +// equivalent ILIKE-based predicate expression. +// +// Two callers share this conversion: +// - the planner's MATCH...AGAINST LIKE fallback rewrite, used by the +// "fts-like-fallback" alternative round when round 1 reports that the +// native FTSMysqlMatchAgainst builtin cannot serve a predicate-context +// MATCH (no FTS index on a TiFlash replica, modifier not pushdown-supported); +// - selectivity estimation, which substitutes the same ILIKE form for the +// opaque FTSMysqlMatchAgainst builtin so round 1's cost is computed from +// column statistics rather than a flat default — the native builtin +// cannot be evaluated in TiDB and would otherwise fall through to a +// SelectivityFactor (0.8) that ignores the column's histogram. +// +// Returns an integer (0/1) typed expression suitable for direct use as a +// filter predicate. +// +// Semantic differences from MySQL's full-text search are documented in detail +// at the planner-level call site; this helper preserves those approximations +// so both callers see the same translated expression. +func BuildFTSToILikeExpression( + ctx BuildContext, + columns []Expression, + searchText string, + modifier ast.FulltextSearchModifier, +) (Expression, error) { + if len(columns) == 0 { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with no columns") + } + + // WITH QUERY EXPANSION requires a second FTS pass to find semantically related + // terms; LIKE cannot approximate this. Error explicitly rather than silently + // producing wrong results. + if modifier.WithQueryExpansion() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback") + } + + // Reject search strings outside the strict supported subset before we + // translate. Callers that want a graceful fallback (e.g. the planner + // redirecting to the native builtin, or selectivity estimation falling + // through to a default estimate) should call this validator directly and + // react to its error. + if err := ValidateFTSSearchStringForLikeFallback(searchText, modifier); err != nil { + return nil, err + } + + if searchText == "" { + return ftsZeroIntConst(), nil + } + + if modifier.IsBooleanMode() { + return buildFTSBooleanModeILikeExpression(ctx, columns, searchText) + } + if modifier.IsNaturalLanguageMode() { + return buildFTSNaturalLanguageModeILikeExpression(ctx, columns, searchText) + } + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST modifier is not supported in the LIKE fallback") +} + +// ftsZeroIntConst returns the constant-0 tiny-int expression used whenever +// the LIKE fallback can prove no row will match (empty search string, all +// terms tokenized away, or boolean-mode "only excluded" queries). +func ftsZeroIntConst() Expression { + return &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } +} + +// buildFTSBooleanModeILikeExpression handles `IN BOOLEAN MODE`. Required +// terms become an AND of per-term column-DNFs, excluded terms become NOT over +// per-term column-DNFs, and optional terms anchor the result only when no +// required terms exist (since LIKE cannot rank). +func buildFTSBooleanModeILikeExpression(ctx BuildContext, columns []Expression, searchText string) (Expression, error) { + terms := parseFTSBooleanSearchString(searchText) + if len(terms) == 0 { + return ftsZeroIntConst(), nil + } + + var required, excluded, optional []ftsSearchTerm + for _, term := range terms { + if term.word == "" { + continue + } + if term.isRequired { + required = append(required, term) + } else if term.isExcluded { + excluded = append(excluded, term) + } else { + optional = append(optional, term) + } + } + + // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns + // an empty result set. The LIKE fallback must match this: when there are + // no required and no optional terms, no row can possibly satisfy the + // search, so return a constant FALSE immediately. + if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { + return ftsZeroIntConst(), nil + } + + var allPredicates []Expression + + // For each required term: (col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range required { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + allPredicates = append(allPredicates, ComposeDNFCondition(ctx, termColumnPreds...)) + } + } + + // For each excluded term: NOT(col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range excluded { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + notPred, err := NewFunction(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), + ComposeDNFCondition(ctx, termColumnPreds...)) + if err != nil { + return nil, err + } + allPredicates = append(allPredicates, notPred) + } + } + + // For optional terms: since LIKE cannot rank, treat optionals as a + // positive filter when no required terms exist. + // - required>0: ignore optionals (required terms already anchor the result) + // - required==0, excluded==0: at least one optional must match (pure optional query) + // - required==0, excluded>0: at least one optional must match AND excluded terms + // must be absent; AND the optional-DNF into allPredicates below + if len(optional) > 0 && len(required) == 0 { + var allOptionalPreds []Expression + for _, term := range optional { + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + allOptionalPreds = append(allOptionalPreds, pred) + } + } + if len(allOptionalPreds) > 0 { + optionalDNF := ComposeDNFCondition(ctx, allOptionalPreds...) + if len(excluded) == 0 { + return optionalDNF, nil + } + allPredicates = append(allPredicates, optionalDNF) + } + } + + if len(allPredicates) == 0 { + return ftsZeroIntConst(), nil + } + + return ComposeCNFCondition(ctx, allPredicates...), nil +} + +// buildFTSNaturalLanguageModeILikeExpression handles the default +// natural-language mode by splitting the search string into whitespace +// tokens and OR-ing per-column per-word ILIKE predicates together. +func buildFTSNaturalLanguageModeILikeExpression(ctx BuildContext, columns []Expression, searchText string) (Expression, error) { + words := strings.Fields(searchText) + if len(words) == 0 { + return ftsZeroIntConst(), nil + } + + var columnPredicates []Expression + for _, column := range columns { + var wordPredicates []Expression + for _, word := range words { + pred, err := buildFTSILikePredicate(ctx, column, word) + if err != nil { + return nil, err + } + wordPredicates = append(wordPredicates, pred) + } + if len(wordPredicates) > 0 { + columnPredicates = append(columnPredicates, ComposeDNFCondition(ctx, wordPredicates...)) + } + } + + if len(columnPredicates) == 0 { + return ftsZeroIntConst(), nil + } + + return ComposeDNFCondition(ctx, columnPredicates...), nil +} + +// BuildFTSToILikeExpressionFromBuiltin pulls the search string and modifier +// out of a MATCH...AGAINST scalar function (FTSMysqlMatchAgainst) and +// delegates to BuildFTSToILikeExpression. It is the entry point for +// selectivity estimation, where the FTS scalar function is opaque to the +// stats engine; substituting an equivalent ILIKE expression lets the engine +// reuse its TopN/histogram-based estimation paths instead of falling back +// to a flat default that ignores column statistics. +// +// Restricted to single-column MATCH: GetSelectivityByFilter only estimates +// expressions over a single column, so a multi-column substituted ILIKE would +// be declined by the stats engine and fall through to the same str-match +// default that the un-substituted FTS expression already receives. Returning +// an error for the multi-column case lets the selectivity caller's existing +// err-check fall through cleanly, without producing a substitute that would +// never improve the estimate. +func BuildFTSToILikeExpressionFromBuiltin(ctx BuildContext, fts *ScalarFunction) (Expression, error) { + if fts == nil || fts.FuncName.L != ast.FTSMysqlMatchAgainst { + return nil, errors.Errorf("expected %s, got %v", ast.FTSMysqlMatchAgainst, fts) + } + args := fts.GetArgs() + if len(args) < 2 { + return nil, errors.Errorf("%s expects at least 2 args, got %d", ast.FTSMysqlMatchAgainst, len(args)) + } + if len(args) > 2 { + return nil, ErrNotSupportedYet.GenWithStackByArgs("multi-column MATCH...AGAINST in selectivity substitution") + } + againstConst, ok := args[0].(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") + } + if againstConst.Value.IsNull() { + // Match the planner-side matchAgainstToLike NULL fast-path: emit + // Constant(NULL) so the substitute preserves SQL three-valued logic + // even though selectivity estimation does not currently exploit the + // difference. Constant(0) here would, under any future cost path that + // composes NOT over the substitute, report "NOT 0 = TRUE → selectivity + // 1" — opposite of native MATCH(NULL) which returns NULL. + return &Constant{ + Value: types.Datum{}, + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + if againstConst.Value.Kind() != types.KindString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search constant") + } + sig, ok := fts.Function.(*builtinFtsMysqlMatchAgainstSig) + if !ok { + return nil, errors.Errorf("unexpected builtin signature for %s: %T", ast.FTSMysqlMatchAgainst, fts.Function) + } + return BuildFTSToILikeExpression(ctx, args[1:], againstConst.Value.GetString(), sig.modifier) +} + +// buildFTSILikePredicate builds a single ILIKE predicate for a column and search term, +// wrapped in IFNULL so that NULL columns are treated as not containing the term. +func buildFTSILikePredicate(ctx BuildContext, column Expression, term string) (Expression, error) { + escapedTerm := escapeFTSLikePattern(term) + + // NOTE: Prefix matching (word*) in MySQL full-text search matches words that START with + // the prefix, but the word can appear anywhere in the text. Using LIKE without REGEXP, + // we cannot perfectly enforce word-start boundaries. We use %term% which may produce + // false positives but avoids false negatives. + pattern := "%" + escapedTerm + "%" + + patternConst := &Constant{ + Value: types.NewStringDatum(pattern), + RetType: types.NewFieldType(mysql.TypeVarchar), + } + + // Backslash escape character (=92) for ILIKE. + escapeConst := &Constant{ + Value: types.NewIntDatum(92), + RetType: types.NewFieldType(mysql.TypeTiny), + } + + // MySQL full-text search is always case-insensitive regardless of column + // collation, so ILIKE matches that semantic rather than plain LIKE which + // would follow the column's collation. + likeFunc, err := NewFunction(ctx, ast.Ilike, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) + if err != nil { + return nil, err + } + + // Wrap with IFNULL so a NULL column is treated as not containing the term + // (consistent with MySQL FTS semantics where NULL columns are ignored). + // Without this, NOT(NULL ILIKE %term%) = NOT(NULL) = NULL which incorrectly + // filters rows that have a NULL column and don't contain the excluded term. + zeroConst := &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } + return NewFunction(ctx, ast.Ifnull, types.NewFieldType(mysql.TypeTiny), likeFunc, zeroConst) +} diff --git a/pkg/expression/fts_to_like_test.go b/pkg/expression/fts_to_like_test.go new file mode 100644 index 0000000000000..4f0581698c0a7 --- /dev/null +++ b/pkg/expression/fts_to_like_test.go @@ -0,0 +1,340 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/mock" + "github.com/stretchr/testify/require" +) + +func TestValidateFTSSearchStringForLikeFallback(t *testing.T) { + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + booleanMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode) + + tests := []struct { + name string + text string + modifier ast.FulltextSearchModifier + wantErr bool + }{ + // Natural-language mode: plain alphanumeric words only. + {name: "natural empty", text: "", modifier: naturalMode, wantErr: false}, + {name: "natural whitespace only", text: " \t\n ", modifier: naturalMode, wantErr: false}, + {name: "natural single word", text: "MySQL", modifier: naturalMode, wantErr: false}, + {name: "natural multi word", text: "MySQL tutorial PostgreSQL", modifier: naturalMode, wantErr: false}, + {name: "natural alphanumeric mix", text: "abc123 mysql8", modifier: naturalMode, wantErr: false}, + {name: "natural rejects mid-word dash", text: "x-x", modifier: naturalMode, wantErr: true}, + {name: "natural rejects punctuation suffix", text: "MySQL,", modifier: naturalMode, wantErr: true}, + {name: "natural rejects + operator", text: "+word", modifier: naturalMode, wantErr: true}, + {name: "natural rejects - operator", text: "-word", modifier: naturalMode, wantErr: true}, + {name: "natural rejects quote", text: `"phrase"`, modifier: naturalMode, wantErr: true}, + {name: "natural rejects wildcard", text: "word*", modifier: naturalMode, wantErr: true}, + {name: "natural rejects percent", text: "100%", modifier: naturalMode, wantErr: true}, + {name: "natural rejects underscore", text: "test_file", modifier: naturalMode, wantErr: true}, + + // Boolean mode: plain word, +word, -word with alphanumeric body only. + {name: "boolean empty", text: "", modifier: booleanMode, wantErr: false}, + {name: "boolean plain word", text: "MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean required word", text: "+MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean excluded word", text: "-MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean mix", text: "+apple -cherry pie", modifier: booleanMode, wantErr: false}, + {name: "boolean rejects mid-word dash", text: "xx-yy", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects bare operator", text: "+", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects bare minus", text: "-", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects + after body", text: "x+y", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects wildcard", text: "word*", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects required wildcard", text: "+word*", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects relevance gt", text: ">word", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects relevance lt", text: " 127 case). + {name: "natural utf8 word", text: "你好", modifier: naturalMode, wantErr: false}, + {name: "boolean utf8 word", text: "+你好", modifier: booleanMode, wantErr: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateFTSSearchStringForLikeFallback(tt.text, tt.modifier) + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +// TestParseFTSBooleanSearchString covers the strict-subset inputs the boolean +// parser is expected to handle in production. Inputs outside the subset +// (phrases, wildcards, relevance modifiers, mid-word punctuation, etc.) are +// rejected upstream by ValidateFTSSearchStringForLikeFallback and therefore +// never reach this parser. +func TestParseFTSBooleanSearchString(t *testing.T) { + tests := []struct { + input string + expected []ftsSearchTerm + }{ + { + input: "+apple +pie", + expected: []ftsSearchTerm{ + {word: "apple", isRequired: true}, + {word: "pie", isRequired: true}, + }, + }, + { + input: "+apple -cherry", + expected: []ftsSearchTerm{ + {word: "apple", isRequired: true}, + {word: "cherry", isExcluded: true}, + }, + }, + { + input: "word1 word2 word3", + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "word2"}, + {word: "word3"}, + }, + }, + { + input: "word1\t\nword2", + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "word2"}, + }, + }, + { + input: "", + expected: nil, + }, + { + input: " \t\n ", + expected: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseFTSBooleanSearchString(tt.input) + require.Equal(t, len(tt.expected), len(result), "Number of terms should match") + for i, expected := range tt.expected { + require.Equal(t, expected.word, result[i].word, "Word should match") + require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") + require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") + } + }) + } +} + +func TestParseFTSSearchTerm(t *testing.T) { + tests := []struct { + input string + expected ftsSearchTerm + }{ + {input: "+word", expected: ftsSearchTerm{word: "word", isRequired: true}}, + {input: "-word", expected: ftsSearchTerm{word: "word", isExcluded: true}}, + {input: "word", expected: ftsSearchTerm{word: "word"}}, + {input: "", expected: ftsSearchTerm{}}, + // Bare operator with no body (caller passes the result through; the + // upstream validator rejects this case before the parser sees it). + {input: "+", expected: ftsSearchTerm{word: "", isRequired: true}}, + {input: "-", expected: ftsSearchTerm{word: "", isExcluded: true}}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseFTSSearchTerm(tt.input) + require.Equal(t, tt.expected.word, result.word, "Word should match") + require.Equal(t, tt.expected.isRequired, result.isRequired, "isRequired should match") + require.Equal(t, tt.expected.isExcluded, result.isExcluded, "isExcluded should match") + }) + } +} + +func TestEscapeFTSLikePattern(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + input: "normal text", + expected: "normal text", + }, + { + input: "100%", + expected: "100\\%", + }, + { + input: "test_file", + expected: "test\\_file", + }, + { + input: "path\\to\\file", + expected: "path\\\\to\\\\file", + }, + { + input: "mix_%_all", + expected: "mix\\_\\%\\_all", + }, + { + input: "\\%_", + expected: "\\\\\\%\\_", + }, + { + input: "", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := escapeFTSLikePattern(tt.input) + require.Equal(t, tt.expected, result, "Escaped pattern should match") + }) + } +} + +// newFTSMatchAgainstForTest builds a real FTSMysqlMatchAgainst ScalarFunction +// suitable for exercising BuildFTSToILikeExpressionFromBuiltin. It mirrors +// the planner's matchAgainstToBuiltin flow: build via NewFunction with a +// string Constant for AGAINST and one or more string Columns for MATCH, +// then attach the modifier via SetFTSMysqlMatchAgainstModifier. +func newFTSMatchAgainstForTest(t *testing.T, ctx BuildContext, search string, numCols int, modifier ast.FulltextSearchModifier) *ScalarFunction { + t.Helper() + stringTp := types.NewFieldType(mysql.TypeVarchar) + stringTp.SetCollate(mysql.DefaultCollationName) + args := make([]Expression, 0, 1+numCols) + args = append(args, &Constant{Value: types.NewStringDatum(search), RetType: stringTp}) + for i := range numCols { + args = append(args, &Column{Index: i, RetType: stringTp}) + } + fn, err := NewFunction(ctx, ast.FTSMysqlMatchAgainst, types.NewFieldType(mysql.TypeDouble), args...) + require.NoError(t, err) + sf, ok := fn.(*ScalarFunction) + require.True(t, ok) + require.NoError(t, SetFTSMysqlMatchAgainstModifier(sf, modifier)) + return sf +} + +func TestBuildFTSToILikeExpressionFromBuiltin(t *testing.T) { + ctx := mock.NewContext() + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + + t.Run("nil scalar function", func(t *testing.T) { + _, err := BuildFTSToILikeExpressionFromBuiltin(ctx, nil) + require.Error(t, err) + }) + + t.Run("wrong function name", func(t *testing.T) { + // Construct a non-FTS ScalarFunction by reusing one we know exists. + stringTp := types.NewFieldType(mysql.TypeVarchar) + col := &Column{Index: 0, RetType: stringTp} + other, err := NewFunction(ctx, ast.Length, types.NewFieldType(mysql.TypeLonglong), col) + require.NoError(t, err) + _, err = BuildFTSToILikeExpressionFromBuiltin(ctx, other.(*ScalarFunction)) + require.Error(t, err) + require.Contains(t, err.Error(), ast.FTSMysqlMatchAgainst) + }) + + t.Run("single-column natural-language succeeds", func(t *testing.T) { + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 1, naturalMode) + expr, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.NoError(t, err) + require.NotNil(t, expr) + // The result should be a scalar function (IFNULL(ILIKE,...)) — not the + // untranslated FTS opaque builtin. + resultSF, ok := expr.(*ScalarFunction) + require.True(t, ok) + require.NotEqual(t, ast.FTSMysqlMatchAgainst, resultSF.FuncName.L) + }) + + t.Run("multi-column rejected for selectivity substitution", func(t *testing.T) { + // GetSelectivityByFilter declines expressions over more than one column, + // so a multi-column substituted ILIKE would never improve the estimate. + // BuildFTSToILikeExpressionFromBuiltin returns an error to keep that + // path explicit; the selectivity caller's err-check then falls through + // to the str-match default cleanly. + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 2, naturalMode) + _, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.Error(t, err) + require.Contains(t, err.Error(), "multi-column") + }) + + t.Run("NULL search constant returns Constant(NULL)", func(t *testing.T) { + // The builtin's getFunction allows NULL search constants explicitly + // (builtin_fts.go:129); the substitution short-circuits to Constant(NULL) + // rather than Constant(0) so it composes correctly under SQL three-valued + // logic and matches the planner-side matchAgainstToLike NULL fast-path. + stringTp := types.NewFieldType(mysql.TypeVarchar) + nullArg := &Constant{Value: types.NewDatum(nil), RetType: stringTp} + col := &Column{Index: 0, RetType: stringTp} + fn, err := NewFunction(ctx, ast.FTSMysqlMatchAgainst, types.NewFieldType(mysql.TypeDouble), nullArg, col) + require.NoError(t, err) + sf := fn.(*ScalarFunction) + require.NoError(t, SetFTSMysqlMatchAgainstModifier(sf, naturalMode)) + + expr, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.NoError(t, err) + c, ok := expr.(*Constant) + require.True(t, ok) + require.True(t, c.Value.IsNull(), "expected Constant(NULL), got %v", c.Value) + }) + + t.Run("search string outside strict subset rejected", func(t *testing.T) { + // Search string with mid-word `-` fails ValidateFTSSearchStringForLikeFallback + // and propagates that rejection through BuildFTSToILikeExpression. + sf := newFTSMatchAgainstForTest(t, ctx, "xx-yy", 1, naturalMode) + _, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.Error(t, err) + }) +} + +func TestScalarExprSupportedByFlashRejectsNonDefaultFTSModifier(t *testing.T) { + // The tipb pushdown protocol does not serialize the FTS modifier; TiFlash + // reconstructs the signature with the default (natural-language) modifier. + // scalarExprSupportedByFlash must therefore mark non-default-modifier + // FTSMysqlMatchAgainst as NOT Flash-supported even though the function + // name is generally Flash-pushdown-eligible. This is defense in depth on + // top of the planner's modifier guard in matchAgainstToBuiltin. + ctx := mock.NewContext() + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + booleanMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode) + queryExpansion := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion) + + cases := []struct { + name string + modifier ast.FulltextSearchModifier + want bool + }{ + {"natural-language mode is Flash-supported", naturalMode, true}, + {"boolean mode is not Flash-supported", booleanMode, false}, + {"with-query-expansion is not Flash-supported", queryExpansion, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 1, tc.modifier) + require.Equal(t, tc.want, scalarExprSupportedByFlash(ctx.GetEvalCtx(), sf)) + }) + } +} diff --git a/pkg/expression/function_traits_test.go b/pkg/expression/function_traits_test.go index 18f50927d499e..0317d2cf0ca91 100644 --- a/pkg/expression/function_traits_test.go +++ b/pkg/expression/function_traits_test.go @@ -25,3 +25,288 @@ func TestUnfoldableFuncs(t *testing.T) { _, ok := unFoldableFunctions[ast.Sysdate] require.True(t, ok) } +<<<<<<< HEAD +======= + +// CREATE TABLE (..., ... AS (func(...))) is using IllegalFunctions4GeneratedColumns +// as list of functions that are illegal. This is basically a blocklist. +// +// This functions has knownGood as list of functions that should not be on this blocklist. +// This ensures that for new functions a conscious decision is made to allow or not allow +// the use in generated columns. +// +// Functions should only be allowed if they are: +// - Deterministic (this excludes RAND(), UUID(), CURRENT_TIMESTAMP(), etc) +// - Not dependent on session or global state (this excludes CONNECTION_ID(), CURRENT_USER(), etc) +// - Functions that have system interactions (this excludes GET_LOCK(), RELEASE_LOCK(), SLEEP(), ec) +func TestIllegalFunctions4GeneratedColumns(t *testing.T) { + builtin := GetBuiltinList() + legal := make([]string, 0) // Not on illegal list + knownGood := []string{ + "abs", + "acos", + "adddate", + "addtime", + "aes_decrypt", + "aes_encrypt", + "and", // operator + "any_value", + "ascii", + "asin", + "atan", + "atan2", + "bin", + "bin_to_uuid", + "bit_count", + "bit_length", + "bitand", // bit_and results in: ERROR 1111 (HY000): Invalid use of group function + "bitneg", // Not allowed. Maybe in known as bit_neg and bitneg + "bitor", // Not allowed. + "bitxor", // Not allowed. + "case", // OK: (case when 1=1 then 2 else 3 end) + "ceil", + "ceiling", + "char_func", + "char_length", + "character_length", + "charset", + "coalesce", + "coercibility", + "collation", + "compress", + "concat", + "concat_ws", + "conv", + "convert", + "convert_tz", // Allowed by MySQL and TiDB: (convert_tz('00:00:00', 'SYSTEM', 'Europe/Amsterdam')) + "cos", + "cot", + "crc32", + "date", + "date_add", + "date_format", + "date_sub", + "datediff", + "day", + "dayname", + "dayofmonth", + "dayofweek", + "dayofyear", + "decode", + "default_func", + "degrees", + "div", + "elt", + "encode", + "eq", + "exp", + "export_set", + "extract", + "field", + "find_in_set", + "floor", + "format", + "format_bytes", + "format_nano_time", + "from_base64", + "from_days", + "from_unixtime", + "fts_match_word", + "ge", + "get_format", + "getparam", + "greatest", + "grouping", + "gt", + "hex", + "hour", + "if", + "ifnull", + "ilike", + "in", + "inet6_aton", + "inet6_ntoa", + "inet_aton", + "inet_ntoa", + "insert_func", + "instr", + "intdiv", + "interval", + "is_ipv4", + "is_ipv4_compat", + "is_ipv4_mapped", + "is_ipv6", + "is_uuid", + "isfalse", + "isnull", + "istrue", + "json_array", + "json_array_append", + "json_array_insert", + "json_contains", + "json_contains_path", + "json_depth", + "json_extract", + "json_insert", + "json_keys", + "json_length", + "json_memberof", + "json_merge_patch", + "json_merge_preserve", + "json_object", + "json_overlaps", + "json_pretty", + "json_quote", + "json_remove", + "json_replace", + "json_schema_valid", + "json_search", + "json_set", + "json_storage_free", + "json_storage_size", + "json_type", + "json_unquote", + "json_valid", + "last_day", + "lastval", + "lcase", + "le", + "least", + "left", + "leftshift", + "length", + "like", + "ln", + "locate", + "log", + "log10", + "log2", + "lower", + "lpad", + "lt", + "ltrim", + "make_set", + "makedate", + "maketime", + "match_against", + "md5", + "microsecond", + "mid", + "minus", + "minute", + "mod", + "month", + "monthname", + "mul", + "ne", + "nextval", + "not", + "nulleq", + "oct", + "octet_length", + "or", + "ord", + "password", + "period_add", + "period_diff", + "pi", + "plus", + "position", + "pow", + "power", + "quarter", + "quote", + "radians", + "regexp", + "regexp_instr", + "regexp_like", + "regexp_replace", + "regexp_substr", + "repeat", + "replace", + "reverse", + "right", + "rightshift", + "round", + "rpad", + "rtrim", + "sec_to_time", + "second", + "setval", + "sha", + "sha1", + "sha2", + "sign", + "sin", + "sm3", // TiDB specific? + "space", + "sqrt", + "str_to_date", + "strcmp", + "subdate", + "substr", + "substring", + "substring_index", + "subtime", + "tan", + "tidb_decode_binary_plan", + "tidb_decode_key", + "tidb_decode_plan", + "tidb_decode_sql_digests", + "tidb_encode_index_key", + "tidb_encode_record_key", + "tidb_encode_sql_digest", + "tidb_mvcc_info", + "tidb_parse_tso", + "tidb_parse_tso_logical", + "tidb_shard", + "time", + "time_format", + "time_to_sec", + "timediff", + "timestamp", + "timestampadd", + "timestampdiff", + "to_base64", + "to_days", + "to_seconds", + "translate", + "trim", + "truncate", + "ucase", + "unaryminus", + "uncompress", + "uncompressed_length", + "unhex", + "upper", + "uuid_timestamp", + "uuid_to_bin", + "uuid_version", + "validate_password_strength", + "vec_as_text", + "vec_cosine_distance", + "vec_dims", + "vec_from_text", + "vec_l1_distance", + "vec_l2_distance", + "vec_l2_norm", + "vec_negative_inner_product", + "vitess_hash", // TiDB specific + "week", + "weekday", + "weekofyear", + "weight_string", + "xor", + "year", + "yearweek", + } + + for _, fname := range builtin { + _, ok := IllegalFunctions4GeneratedColumns[fname] + if !ok { + legal = append(legal, fname) + } + } + require.Equal(t, knownGood, legal) +} +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) diff --git a/pkg/expression/infer_pushdown.go b/pkg/expression/infer_pushdown.go index cad90178ab293..686adaa501502 100644 --- a/pkg/expression/infer_pushdown.go +++ b/pkg/expression/infer_pushdown.go @@ -413,6 +413,23 @@ func scalarExprSupportedByFlash(ctx EvalContext, function *ScalarFunction) bool return true case ast.VecDims, ast.VecL1Distance, ast.VecL2Distance, ast.VecNegativeInnerProduct, ast.VecCosineDistance, ast.VecL2Norm, ast.VecAsText: return true +<<<<<<< HEAD +======= + case ast.FTSMatchWord: + return true + case ast.FTSMysqlMatchAgainst: + // The tipb pushdown protocol (see distsql_builtin.go) does not + // serialize the FTS modifier; TiFlash defaults to natural-language + // mode on the reconstructed signature. Pushing a Boolean-mode or + // WITH QUERY EXPANSION call down would therefore silently execute + // with the modifier dropped. Mark such calls as not Flash-supported + // here as a defense in depth — the planner's modifier guard in + // matchAgainstToBuiltin already rejects them at plan time, but + // keeping pushdown self-consistent guards against any future code + // path that builds an FTSMysqlMatchAgainst around the planner. + sig, ok := function.Function.(*builtinFtsMysqlMatchAgainstSig) + return ok && !sig.modifier.IsBooleanMode() && !sig.modifier.WithQueryExpansion() +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) case ast.Grouping: // grouping function for grouping sets identification. return true } diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index a5033375f210d..6b8d67daccd36 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -62,6 +62,209 @@ import ( "github.com/tikv/client-go/v2/oracle" ) +<<<<<<< HEAD +======= +// The following tests will be brought back when optimizer part is ready. +// +// func TestFTSUnsupportedCases(t *testing.T) { +// store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) +// tk := testkit.NewTestKit(t, store) +// tk.MustExec("use test") + +// tiflash := infosync.NewMockTiFlash() +// infosync.SetMockTiFlash(tiflash) +// defer func() { +// tiflash.Lock() +// tiflash.StatusServer.Close() +// tiflash.Unlock() +// }() + +// failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess", `return(1)`) +// defer func() { +// require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess")) +// }() + +// tk.MustExec("create table t(title TEXT, body TEXT)") +// tk.MustExec("insert into t values ('title 1', 'hello world'), ('title 2', 'hello TiDB')") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title)", "Full text search can only be used with a matching fulltext index") +// tk.MustExec("drop table t") + +// tk.MustExec(`create table t( +// id INT, title TEXT, body TEXT, +// FULLTEXT KEY (title) +// )`) +// tbl, _ := domain.GetDomain(tk.Session()).InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t")) +// tbl.Meta().TiFlashReplica = &model.TiFlashReplicaInfo{ +// Count: 1, +// Available: true, +// } + +// tk.MustContainErrMsg("explain select fts_match_word('hello', title) from t", "Currently 'FTS_MATCH_WORD()' cannot be used in SELECT fields") +// tk.MustContainErrMsg("explain select fts_match_word('hello', title) from t where fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' cannot be used in SELECT fields") + +// tk.MustQuery("explain select * from t where fts_match_word('hello', title)") +// tk.MustQuery("explain select * from t where fts_match_word('hello', title) AND id > 10") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', body)", "Full text search can only be used with a matching fulltext index") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', body) OR id > 10", "Currently 'FTS_MATCH_WORD()' must be used alone") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title) OR id > 10", "Currently 'FTS_MATCH_WORD()' must be used alone") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title) > 0", "Currently 'FTS_MATCH_WORD()' must be used alone") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title) AND fts_match_word('hello body', title)", "Currently 'FTS_MATCH_WORD()' must be used alone") + +// tk.MustContainErrMsg("explain select * from t order by fts_match_word('hello', title) limit 10", "It must be used with a WHERE clause and must be used alone") +// tk.MustContainErrMsg("explain select * from t order by fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' in ORDER BY without a LIMIT clause is not supported") +// tk.MustContainErrMsg("explain select * from t order by 1, fts_match_word('hello', title) limit 5", "FTS_MATCH_WORD() must be used as the first item in ORDER BY") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title) order by fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' in ORDER BY without a LIMIT clause is not supported") +// tk.MustQuery("explain select * from t where fts_match_word('hello', title) order by fts_match_word('hello', title) limit 10") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title) order by fts_match_word('hello world', title) limit 10", "'FTS_MATCH_WORD()' in ORDER BY must match the one in WHERE") + +// tk.MustExec("set @@tidb_isolation_read_engines='tidb,tiflash'") +// tk.MustQuery("explain select * from t where fts_match_word('hello', title)") + +// tk.MustExec("set @@tidb_isolation_read_engines='tidb,tikv'") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title)", "Full text search can be only executed in a columnar storage") + +// tk.MustExec("alter table t set tiflash replica 0") +// tk.MustExec("set @@tidb_isolation_read_engines='tidb,tikv'") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title)", "Full text search can be only executed in a columnar storage") +// tk.MustExec("set @@tidb_isolation_read_engines='tidb,tikv,tiflash'") +// tk.MustContainErrMsg("explain select * from t where fts_match_word('hello', title)", "Full text search can be only executed in a columnar storage") +// } + +func TestFTSParser(t *testing.T) { + store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tiflash := infosync.NewMockTiFlash() + infosync.SetMockTiFlash(tiflash) + defer func() { + tiflash.Lock() + tiflash.StatusServer.Close() + tiflash.Unlock() + }() + + failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess", `return(1)`) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess")) + }() + + tk.MustExec("create table tx (a TEXT, FULLTEXT (a))") + tk.MustQuery("show create table tx").Check(testkit.Rows( + "tx CREATE TABLE `tx` (\n" + + " `a` text DEFAULT NULL,\n" + + " FULLTEXT INDEX `a`(`a`) WITH PARSER STANDARD\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin", + )) + tk.MustExec("drop table tx") + + tk.MustExec("create table tx (a TEXT, FULLTEXT (a) WITH PARSER standard)") + tk.MustQuery("show create table tx").Check(testkit.Rows( + "tx CREATE TABLE `tx` (\n" + + " `a` text DEFAULT NULL,\n" + + " FULLTEXT INDEX `a`(`a`) WITH PARSER STANDARD\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin", + )) + tk.MustExec("drop table tx") + + tk.MustExec("create table tx (a TEXT, FULLTEXT (a) WITH PARSER multilingual)") + tk.MustQuery("show create table tx").Check(testkit.Rows( + "tx CREATE TABLE `tx` (\n" + + " `a` text DEFAULT NULL,\n" + + " FULLTEXT INDEX `a`(`a`) WITH PARSER MULTILINGUAL\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin", + )) + tk.MustExec("drop table tx") + + tk.MustContainErrMsg("create table tx (a TEXT, FULLTEXT (a) WITH PARSER abc)", "Unsupported parser 'abc'") +} + +func TestFTSSyntax(t *testing.T) { + store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tiflash := infosync.NewMockTiFlash() + infosync.SetMockTiFlash(tiflash) + defer func() { + tiflash.Lock() + tiflash.StatusServer.Close() + tiflash.Unlock() + }() + + failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess", `return(1)`) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess")) + }() + + tk.MustExec("create table t(title TEXT, body TEXT, FULLTEXT INDEX(title))") + tbl, _ := domain.GetDomain(tk.Session()).InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t")) + tbl.Meta().TiFlashReplica = &model.TiFlashReplicaInfo{ + Count: 1, + Available: true, + } + + tk.MustQuery("select * from t where fts_match_word('hello', title)") + tk.MustQuery("select * from t where fts_match_word('hello', title) AND body = ''") + // tk.MustContainErrMsg("select * from t where (fts_match_word('hello', title)) > 0", "Currently 'FTS_MATCH_WORD()' must be used alone") + // tk.MustContainErrMsg("select (fts_match_word('hello', title)) AS score from t where fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' cannot be used in SELECT fields") + tk.MustContainErrMsg("select * from t where match() against ('hello')", `You have an error in your SQL syntax`) + // Test MATCH...AGAINST with alternative plans - LIKE fallback competes on cost + tk.MustExec("set @@tidb_opt_enable_alternative_logical_plans=ON") + tk.MustQuery("select * from t where match(title) against ('hello' in boolean mode)") + tk.MustExec("set @@tidb_opt_enable_alternative_logical_plans=OFF") + tk.MustContainErrMsg("select * from t where fts_match_word(title, body)", `match against a non-constant string`) + tk.MustContainErrMsg("select * from t where fts_match_word(45.67, body)", `match against a non-constant string`) + tk.MustContainErrMsg("select * from t where fts_match_word('hello', title, body)", `Incorrect parameter count in the call to native function`) +} + +func TestFTSIndexSyntax(t *testing.T) { + store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tiflash := infosync.NewMockTiFlash() + infosync.SetMockTiFlash(tiflash) + defer func() { + tiflash.Lock() + tiflash.StatusServer.Close() + tiflash.Unlock() + }() + + failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess", `return(1)`) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/ddl/MockCheckColumnarIndexProcess")) + }() + + tk.MustContainErrMsg("create table t(title TEXT, body TEXT, FULLTEXT KEY (`title`, `body`))", `FULLTEXT index must specify one column name`) + tk.MustContainErrMsg("create table t(title TEXT, body TEXT, FULLTEXT KEY ((`title`)))", `FULLTEXT index must specify one column name`) + tk.MustContainErrMsg("create table t(title TEXT, body TEXT, FULLTEXT KEY (title(5)))", `FULLTEXT index does not support prefix length`) + tk.MustContainErrMsg("create table t(title TEXT, body TEXT, FULLTEXT KEY (title DESC))", `FULLTEXT index does not support DESC order`) + tk.MustContainErrMsg("create table t(title TEXT, body TEXT, c INT, FULLTEXT KEY (c))", `only support string type`) + tk.MustContainErrMsg("create table t1(title TEXT, body TEXT, FULLTEXT KEY (title) WITH PARSER ngramx)", `Unsupported parser`) + + tk.MustExec("create table t1(title TEXT, body TEXT, FULLTEXT KEY (title))") + tk.MustQuery("show create table t1").Check(testkit.Rows("t1 CREATE TABLE `t1` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `title`(`title`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + tk.MustExec("create table t2(title TEXT, body TEXT, FULLTEXT (title))") + tk.MustQuery("show create table t2").Check(testkit.Rows("t2 CREATE TABLE `t2` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `title`(`title`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + tk.MustExec("create table t3(title TEXT, body TEXT, FULLTEXT KEY `idx` (title))") + tk.MustQuery("show create table t3").Check(testkit.Rows("t3 CREATE TABLE `t3` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `idx`(`title`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + tk.MustExec("create table t4(title TEXT, body TEXT, FULLTEXT KEY `idx` (`title`))") + tk.MustQuery("show create table t4").Check(testkit.Rows("t4 CREATE TABLE `t4` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `idx`(`title`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + tk.MustExec("create table t5(title TEXT, body TEXT, FULLTEXT KEY `idx` (title ASC))") + tk.MustQuery("show create table t5").Check(testkit.Rows("t5 CREATE TABLE `t5` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `idx`(`title`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + tk.MustExec("create table t6(title TEXT, body TEXT, FULLTEXT KEY `idx` (title ASC) WITH PARSER standard)") + tk.MustQuery("show create table t6").Check(testkit.Rows("t6 CREATE TABLE `t6` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `idx`(`title`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + + tk.MustExec("drop table t1, t2, t3, t4, t5, t6") + tk.MustExec("create table t1(title TEXT, body TEXT)") + tk.MustContainErrMsg("alter table t1 add FULLTEXT INDEX (body)", "columnar replica must exist to create") + tk.MustExec("alter table t1 set tiflash replica 1") + tk.MustExec("alter table t1 add FULLTEXT INDEX (body)") + tk.MustQuery("show create table t1").Check(testkit.Rows("t1 CREATE TABLE `t1` (\n `title` text DEFAULT NULL,\n `body` text DEFAULT NULL,\n FULLTEXT INDEX `body`(`body`) WITH PARSER STANDARD\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin")) + tk.MustExec("alter table t1 drop index body") +} + +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) func TestVectorLong(t *testing.T) { store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2)) diff --git a/pkg/parser/ast/functions.go b/pkg/parser/ast/functions.go index 68e736b20ced9..53ca4ad99113e 100644 --- a/pkg/parser/ast/functions.go +++ b/pkg/parser/ast/functions.go @@ -366,6 +366,13 @@ const ( VecFromText = "vec_from_text" VecAsText = "vec_as_text" +<<<<<<< HEAD +======= + // FTS functions (tidb extension) + FTSMatchWord = "fts_match_word" + FTSMysqlMatchAgainst = "match_against" + +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) // TiDB internal function. TiDBDecodeKey = "tidb_decode_key" TiDBMVCCInfo = "tidb_mvcc_info" diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index 880eb94ab200c..0015d912d9239 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -286,6 +286,48 @@ func Selectivity( case ast.Like, ast.Ilike, ast.Regexp, ast.RegexpLike: notCoveredStrMatch[i] = x continue + case ast.FTSMysqlMatchAgainst: + // FTSMysqlMatchAgainst is opaque to the stats engine — its + // evalReal errors when called outside TiFlash, so TopN-based + // estimation can't run on it directly and the generic fallback + // would use SelectivityFactor (0.8) regardless of column stats. + // Substitute the equivalent ILIKE-based expression so the cost + // of round 1's native plan reflects the column's histogram / + // TopN rather than the flat default — this affects join order, + // index selection, etc., even though round 1's plan is the + // only candidate when every predicate MATCH is native-viable + // (the fts-like-fallback round only fires when round 1 is + // discarded). + // + // The substitution only fires for single-column MATCH(...); + // GetSelectivityByFilter declines multi-column expressions, so a + // multi-column substitute would just fall through to the same + // str-match default that the un-substituted FTS expression already + // receives. BuildFTSToILikeExpressionFromBuiltin returns an error + // for the multi-column case to keep that path explicit here. + if substitute, err := expression.BuildFTSToILikeExpressionFromBuiltin(ctx.GetExprCtx(), x); err == nil { + switch sub := substitute.(type) { + case *expression.ScalarFunction: + notCoveredStrMatch[i] = sub + continue + case *expression.Constant: + // AGAINST(NULL) produces Constant(NULL) (preserves SQL + // three-valued logic — matches the planner-side + // matchAgainstToLike NULL fast-path); empty-string + // search produces Constant(0). Route either to the + // constants bucket so the stats engine recognizes the + // substitute as constant-false (the IsNull / ToBool + // pass at line ~309 zeroes selectivity for both + // shapes) instead of applying the str-match default + // (0.1). + notCoveredConstants[i] = sub + continue + } + } + // Fall through if substitution failed; the FTS expression will + // use the str-match default selectivity (0.1) instead of 0.8. + notCoveredStrMatch[i] = x + continue case ast.UnaryNot: inner := expression.GetExprInsideIsTruth(x.GetArgs()[0]) innerSF, ok := inner.(*expression.ScalarFunction) diff --git a/pkg/planner/core/BUILD.bazel b/pkg/planner/core/BUILD.bazel index 6dbe8bd8efca3..5ef12aefa78d7 100644 --- a/pkg/planner/core/BUILD.bazel +++ b/pkg/planner/core/BUILD.bazel @@ -15,8 +15,12 @@ go_library( "expression_rewriter.go", "find_best_task.go", "flat_plan.go", +<<<<<<< HEAD "foreign_key.go", "fragment.go", +======= + "fulltext_to_like.go", +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) "hint_utils.go", "index_join_path.go", "indexmerge_path.go", @@ -224,7 +228,11 @@ go_test( "exhaust_physical_plans_test.go", "expression_test.go", "find_best_task_test.go", +<<<<<<< HEAD "fragment_test.go", +======= + "fulltext_to_like_test.go", +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) "hint_test.go", "indexmerge_intersection_test.go", "indexmerge_path_test.go", diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 1f0e4ba23604a..9825005fba9dd 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -677,6 +677,96 @@ func (er *expressionRewriter) Enter(inNode ast.Node) (ast.Node, bool) { return inNode, false } +<<<<<<< HEAD +======= +// canTreatInSubqueryAsExistsForFilter reports whether the IN subquery is in a WHERE/HAVING boolean chain +// composed only of AND/OR and parentheses, so it can be treated like EXISTS for filter context. +func (er *expressionRewriter) canTreatInSubqueryAsExistsForFilter(planCtx *exprRewriterPlanCtx) bool { + if planCtx == nil { + return false + } + if planCtx.curClause != whereClause && planCtx.curClause != havingClause { + return false + } + if len(er.astNodeStack) == 0 { + return false + } + for i := len(er.astNodeStack) - 2; i >= 0; i-- { + switch n := er.astNodeStack[i].(type) { + case *ast.ParenthesesExpr: + case *ast.BinaryOperationExpr: + if n.Op != opcode.LogicAnd && n.Op != opcode.LogicOr { + return false + } + default: + return false + } + } + return true +} + +// inDirectMatchBooleanContext reports whether the MATCH...AGAINST currently +// being rewritten sits in a position where its boolean (0/1) result is +// directly consumed as a predicate — i.e. every ancestor up to the WHERE / +// HAVING / JOIN ON root is one of: parentheses, AND, OR, or NOT. +// +// Any other ancestor (comparison `= 0` / `> 0.5`, `IS NULL`, CASE, arithmetic, +// XOR, scalar function, etc.) means MATCH is being used as a scalar relevance +// score, where the LIKE rewrite's 0/1 output would diverge from the native +// float score and silently produce wrong rows. In those positions the +// rewriter must fall through to the native FTSMysqlMatchAgainst builtin, +// which preserves the relevance-score semantics (and errors at execution if +// no FTS index is available — the same behavior the user would see with +// alternative logical plans disabled). +func (er *expressionRewriter) inDirectMatchBooleanContext() bool { + if er.planCtx == nil { + return false + } + switch er.planCtx.builder.curClause { + case whereClause, havingClause, onClause: + default: + return false + } + if len(er.astNodeStack) == 0 { + return false + } + for i := len(er.astNodeStack) - 2; i >= 0; i-- { + switch n := er.astNodeStack[i].(type) { + case *ast.ParenthesesExpr: + case *ast.BinaryOperationExpr: + if n.Op != opcode.LogicAnd && n.Op != opcode.LogicOr { + return false + } + case *ast.UnaryOperationExpr: + if n.Op != opcode.Not && n.Op != opcode.Not2 { + return false + } + default: + return false + } + } + return true +} + +// matchHasLikeFallbackRescue reports whether matchAgainstToBuiltin is being +// invoked in a position where the alt-rounds driver will discard the produced +// plan and rebuild via the fts-like-fallback round. It is used by the modifier +// guard in matchAgainstToBuiltin to allow native emission of a non-default +// modifier when round 1's plan is destined for discard anyway. The rescue +// conditions mirror the ones in matchAgainstToExpression that trigger +// MarkNonViableFTSMatch — alternative logical plans enabled AND a direct +// boolean predicate context. +func (er *expressionRewriter) matchHasLikeFallbackRescue() bool { + if er.planCtx == nil || er.planCtx.builder == nil || er.planCtx.builder.ctx == nil { + return false + } + if !er.planCtx.builder.ctx.GetSessionVars().EnableAlternativeLogicalPlans { + return false + } + return er.inDirectMatchBooleanContext() +} + +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) func (er *expressionRewriter) buildSemiApplyFromEqualSubq(np base.LogicalPlan, planCtx *exprRewriterPlanCtx, l, r expression.Expression, not, markNoDecorrelate bool) { intest.AssertNotNil(planCtx) if er.asScalar || not { @@ -1659,6 +1749,8 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok } er.ctxStack[len(er.ctxStack)-1].SetCoercibility(expression.CoercibilityExplicit) er.ctxStack[len(er.ctxStack)-1].SetCharsetAndCollation(arg.GetType(er.sctx.GetEvalCtx()).GetCharset(), arg.GetType(er.sctx.GetEvalCtx()).GetCollate()) + case *ast.MatchAgainst: + er.matchAgainstToExpression(v) default: er.err = errors.Errorf("UnknownType: %T", v) return retNode, false @@ -2174,6 +2266,294 @@ func (er *expressionRewriter) patternLikeOrIlikeToExpression(v *ast.PatternLikeO er.ctxStackAppend(function, types.EmptyName) } +func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { + // Both the column expressions and Against expression have been visited + // and pushed onto the ctxStack. The stack layout is: + // [..., col1, col2, ..., colN, against] + numCols := len(v.ColumnNames) + stackLen := len(er.ctxStack) + if stackLen < numCols+1 { + er.err = errors.Errorf("Unexpected stack length for MatchAgainst: %d", stackLen) + return + } + + // Default behavior (Alt-disabled or Alt-enabled round 1) is to emit the + // native FTSMysqlMatchAgainst builtin. The alternative-rounds driver flips + // AlternativeLogicalPlanFTSLikeFallback to true and re-runs the build + // only when round 1 reported a direct-boolean-context MATCH that the + // native builtin cannot serve (no FTS index on a TiFlash replica / + // modifier not pushdown-supported). In that second pass the rewriter + // emits ILIKE for direct-boolean-context MATCH only — scoring contexts + // (SELECT field list / ORDER BY) and scalar predicate positions + // (IS NULL, comparisons, CASE, arithmetic) need the float relevance + // score, so they keep using the native builtin and will error at + // execution if no FTS index exists there. + // + // "Direct boolean context" requires that every ancestor up to the + // WHERE/HAVING/ON root is AND/OR/NOT/parens — see inDirectMatchBooleanContext. + // Limiting the LIKE rewrite to that subset preserves the 0/1-vs-float + // distinction: in scalar positions, `MATCH(...) IS NULL`, `MATCH(...) > 0.5`, + // etc. would silently produce wrong rows if the LIKE rewrite's integer + // result were substituted for the native float score. + // + // Round 1 also has to record viability before committing to native: if + // any boolean-context MATCH is non-viable, the resulting plan would + // fail at execution. The rewriter records that on the planBuilder so the + // round driver can invalidate the plan and trigger the fallback round. + // Round 1 additionally records that a direct-boolean-context MATCH was + // seen so the driver runs the LIKE round for cost competition even when + // round 1's native plan is executable. + useLikeFallback := false + if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { + sessVars := er.planCtx.builder.ctx.GetSessionVars() + if er.inDirectMatchBooleanContext() { + if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { + // fts-like-fallback round: boolean-context MATCH rewrites to ILIKE. + useLikeFallback = true + } else if sessVars.EnableAlternativeLogicalPlans { + // Round 1 (native). Mark the build so the driver runs the LIKE + // round and cost-compares its plan against round 1's. If this + // MATCH cannot run natively, also mark the build as non-viable + // so the driver discards round 1's plan; the rewrite continues + // with the native builtin to keep round 1 internally consistent. + er.planCtx.builder.MarkPredicateMatch() + if !er.ftsNativeViable(v.Modifier, numCols, stackLen) { + er.planCtx.builder.MarkNonViableFTSMatch() + } + } + } + } + + if useLikeFallback { + er.matchAgainstToLike(v, numCols, stackLen) + } else { + er.matchAgainstToBuiltin(v, numCols, stackLen) + } +} + +// ftsNativeViable reports whether the MATCH(...) currently being rewritten +// can be served on TiFlash by the native FTSMysqlMatchAgainst builtin. It +// walks the resolved column FieldNames sitting on ctxNameStk (stack layout is +// [..., col1, ..., colN, against]) and requires for each column: +// - the originating table has an available TiFlash replica; +// - the column is covered by a public FULLTEXT index on that table. +// +// In addition, the modifier must be the default natural-language mode. Boolean +// mode and WITH QUERY EXPANSION are not encoded in the tipb pushdown today +// (only ScalarFuncSig_FTSMatchExpression is emitted regardless of modifier), +// so a native plan that wins on cost would execute on TiFlash with the modifier +// silently dropped. Until the modifier is carried in the pushdown protocol, we +// treat those modifiers as non-viable for native pushdown. +func (er *expressionRewriter) ftsNativeViable(modifier ast.FulltextSearchModifier, numCols, stackLen int) bool { + if numCols <= 0 { + return false + } + if !ftsModifierAllowsNativePushdown(modifier) { + return false + } + builder := er.planCtx.builder + sessVars := builder.ctx.GetSessionVars() + nameStart := stackLen - numCols - 1 + for i := range numCols { + name := er.ctxNameStk[nameStart+i] + if name == nil { + return false + } + tblName := name.OrigTblName + if tblName.L == "" { + tblName = name.TblName + } + if tblName.L == "" { + return false + } + dbName := name.DBName + if dbName.L == "" { + dbName = ast.NewCIStr(sessVars.CurrentDB) + } + tblInfo, err := builder.is.TableInfoByName(dbName, tblName) + if err != nil { + return false + } + if tblInfo.TiFlashReplica == nil || !tblInfo.TiFlashReplica.Available || tblInfo.TiFlashReplica.Count == 0 { + return false + } + colName := name.OrigColName + if colName.L == "" { + colName = name.ColName + } + if !tableHasPublicFTSIndexOnColumn(tblInfo, colName.L) { + return false + } + } + return true +} + +// ftsModifierAllowsNativePushdown reports whether an FTS modifier can be +// safely served by the native FTSMysqlMatchAgainst builtin pushed to TiFlash. +// Today the tipb pushdown encodes only ScalarFuncSig_FTSMatchExpression and +// drops the modifier, so any non-default modifier would be executed by TiFlash +// as natural-language mode, silently producing wrong results. Only the default +// (natural-language, no query expansion) modifier is currently safe. +func ftsModifierAllowsNativePushdown(modifier ast.FulltextSearchModifier) bool { + return !modifier.IsBooleanMode() && !modifier.WithQueryExpansion() +} + +// tableHasPublicFTSIndexOnColumn reports whether tblInfo has a public FULLTEXT +// index covering the given column. TiDB's FULLTEXT index is single-column, so +// each column in MATCH(...) needs its own FTS index for the native path to be +// viable. +func tableHasPublicFTSIndexOnColumn(tblInfo *model.TableInfo, columnNameL string) bool { + for _, idx := range tblInfo.Indices { + if idx.FullTextInfo == nil || !idx.IsPublic() { + continue + } + if idx.FindColumnByName(columnNameL) != nil { + return true + } + } + return false +} + +// matchAgainstToBuiltin converts MATCH...AGAINST to the FTSMysqlMatchAgainst +// builtin scalar function which can be pushed down to TiFlash for execution +// against a fulltext index. +func (er *expressionRewriter) matchAgainstToBuiltin(v *ast.MatchAgainst, numCols, stackLen int) { + // Reject non-default modifiers when native is the final plan. The tipb + // pushdown protocol (see expression/distsql_builtin.go for the explicit + // note) does not serialize the FTS modifier, so TiFlash would silently + // execute Boolean-mode / query-expansion searches as natural-language + // mode. Until the modifier rides through pushdown, refuse to emit + // native here unless the alt-rounds driver is expected to discard this + // emission and rebuild via the fts-like-fallback round (which handles + // Boolean mode correctly via ILIKE; query expansion still errors there + // with a specific message). + if !ftsModifierAllowsNativePushdown(v.Modifier) && !er.matchHasLikeFallbackRescue() { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)") + return + } + + against := er.ctxStack[stackLen-1] + cols := er.ctxStack[stackLen-numCols-1 : stackLen-1] + + args := make([]expression.Expression, 0, 1+numCols) + args = append(args, against) + args = append(args, cols...) + + er.ctxStackPop(numCols + 1) + fn, err := er.newFunction(ast.FTSMysqlMatchAgainst, &v.Type, args...) + if err != nil { + er.err = err + return + } + sf, ok := fn.(*expression.ScalarFunction) + if !ok { + er.err = errors.Errorf("unexpected expression type for %s: %T", ast.FTSMysqlMatchAgainst, fn) + return + } + if err := expression.SetFTSMysqlMatchAgainstModifier(sf, v.Modifier); err != nil { + er.err = err + return + } + er.ctxStackAppend(fn, types.EmptyName) +} + +// matchAgainstToLike converts MATCH...AGAINST to LIKE predicates as a +// fallback when the native FTS pushdown path is not viable. +func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, stackLen int) { + againstExpr := er.ctxStack[stackLen-1] + + constExpr, ok := againstExpr.(*expression.Constant) + if !ok { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") + return + } + + // The LIKE fallback bakes the search value into the produced plan — either + // as ILIKE pattern constants (non-NULL case) or as a Constant(NULL) + // short-circuit. A cached plan would reuse the first execution's baked + // value for later executions, producing wrong results whenever the AGAINST + // argument is mutable: a `?` parameter marker, a user variable, or another + // deferred expression. In particular, a NULL first bind would bake a + // Constant(NULL) plan and reuse it for a later non-NULL bind. Mark the + // plan non-cacheable here, before the NULL fast-path and before Eval, so + // the skip applies uniformly across all branches below. + if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { + er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") + } + + // Reject non-string matched columns before any value-based branch so the + // column-type error always wins. In current architecture round 1's + // matchAgainstToBuiltin → getFunction (builtin_fts.go) already rejects + // non-string columns before round 2 (this function) can run, but keep + // the check here too as defense in depth: the LIKE fallback's own NULL + // fast-path and strict-subset validator below should never accept a + // non-string column, regardless of any future code path that might + // reach this function around round 1. + columns := make([]expression.Expression, numCols) + for i := range numCols { + col := er.ctxStack[stackLen-numCols-1+i] + if col.GetType(er.sctx.GetEvalCtx()).EvalType() != types.ETString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + return + } + columns[i] = col + } + + searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) + if err != nil { + er.err = err + return + } + + if searchText.IsNull() { + // NULL search yields NULL in MySQL FTS semantics + // (builtin_fts.go evalReal returns isNull=true for NULL args), so we + // emit Constant(NULL) rather than Constant(0). This preserves + // three-valued logic under NOT — NOT NULL = NULL filters the row — + // and under IS NULL / IS NOT NULL. A literal Constant(0) would make + // NOT(MATCH...) admit every row when the search is NULL, diverging + // from native semantics. + er.ctxStackPop(numCols + 1) + er.ctxStackAppend(&expression.Constant{ + Value: types.Datum{}, + RetType: types.NewFieldType(mysql.TypeTiny), + }, types.EmptyName) + return + } + + if searchText.Kind() != types.KindString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search expression") + return + } + + // The LIKE fallback only translates a strict subset of MySQL FTS search + // strings (alphanumeric words, optionally prefixed with + or - in boolean + // mode). Anything outside that subset would tokenize differently in MySQL + // FTS than a substring LIKE match, so refuse it here. If the same MATCH + // is independently native-viable (FTS index + supported modifier), + // delegate to the native builtin so TiFlash serves it correctly; otherwise + // surface the error to the user. + if err := expression.ValidateFTSSearchStringForLikeFallback(searchText.GetString(), v.Modifier); err != nil { + if er.ftsNativeViable(v.Modifier, numCols, stackLen) { + er.matchAgainstToBuiltin(v, numCols, stackLen) + return + } + er.err = err + return + } + + er.ctxStackPop(numCols + 1) + + result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) + if err != nil { + er.err = err + return + } + + er.ctxStackAppend(result, types.EmptyName) +} + func (er *expressionRewriter) regexpToScalarFunc(v *ast.PatternRegexpExpr) { l := len(er.ctxStack) er.err = expression.CheckArgsNotMultiColumnRow(er.ctxStack[l-2:]...) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go new file mode 100644 index 0000000000000..72f0cb04f519e --- /dev/null +++ b/pkg/planner/core/fulltext_to_like.go @@ -0,0 +1,76 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" +) + +// convertMatchAgainstToLike converts a MATCH...AGAINST expression to ILIKE +// predicates. It is a thin wrapper around expression.BuildFTSToILikeExpression; +// the conversion logic lives in pkg/expression so the same translation can be +// shared with cardinality-based selectivity estimation (which substitutes the +// equivalent ILIKE form for the opaque FTSMysqlMatchAgainst builtin). +// +// This is a fallback rewrite since TiDB does not natively support full-text +// search outside the TiFlash FTS path. The planner only invokes it in +// direct-boolean predicate positions — every ancestor up to the +// WHERE / HAVING / JOIN ON root must be AND / OR / NOT / parens +// (see inDirectMatchBooleanContext in expression_rewriter.go). Scoring +// contexts (SELECT field list, ORDER BY) and scalar predicate positions +// (IS NULL, comparisons, CASE, arithmetic) keep the native +// FTSMysqlMatchAgainst builtin so the result is a float relevance score +// rather than 0/1, even though the native path then requires TiFlash at +// execution time. The semantic differences below therefore apply to +// direct-boolean predicate use only: +// +// 1. No relevance scoring — the synthesized ILIKE predicate produces a 0/1 +// boolean filter result, which is the only thing a direct-boolean +// predicate position consumes. Relevance-score positions (ORDER BY, +// scalar SELECT, MATCH ... = 0, MATCH ... > 0.5, etc.) are intentionally +// NOT routed through this fallback; substituting 0/1 there would +// silently corrupt the sort or the comparison. +// 2. No stop word filtering — searches for all words regardless of length +// or commonness. +// 3. No word length limits — MySQL ignores words shorter than +// ft_min_word_len (default 4); the ILIKE rewrite does not. +// 4. No word boundaries — LIKE %term% matches substrings anywhere, not just +// complete words. Example: "cat" matches "concatenate", "category", +// "application"; MySQL FTS only matches "cat" as a standalone word. +// Enforcing word boundaries would require REGEXP, which we avoid. +// 5. Performance — LIKE predicates cannot use full-text indexes (much +// slower on large datasets). +// +// Search-string subset accepted by the rewrite (enforced upstream by +// expression.ValidateFTSSearchStringForLikeFallback): +// +// - Natural-language mode: whitespace-separated alphanumeric words only. +// - Boolean mode: each token is `word`, `+word` (required), or `-word` +// (excluded), where `word` is alphanumeric (ASCII or non-ASCII UTF-8). +// +// Anything outside that subset — phrases, * prefix, > < ~ relevance +// modifiers, () grouping, mid-word punctuation like `xx-yy` — is rejected +// at plan time with ErrNotSupportedYet because MySQL FTS tokenizes those +// constructs in ways a substring LIKE cannot reproduce. WITH QUERY +// EXPANSION is likewise rejected (no LIKE approximation exists for the +// second-pass tokenization). +func (er *expressionRewriter) convertMatchAgainstToLike( + columns []expression.Expression, + searchText string, + modifier ast.FulltextSearchModifier, +) (expression.Expression, error) { + return expression.BuildFTSToILikeExpression(er.sctx, columns, searchText, modifier) +} diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go new file mode 100644 index 0000000000000..159eccf7e9cda --- /dev/null +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -0,0 +1,134 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/stretchr/testify/require" +) + +func TestFTSModifierAllowsNativePushdown(t *testing.T) { + tests := []struct { + name string + modifier ast.FulltextSearchModifier + expected bool + }{ + { + name: "natural language mode (default)", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode), + expected: true, + }, + { + name: "boolean mode", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode), + expected: false, + }, + { + name: "natural language mode with query expansion", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.expected, ftsModifierAllowsNativePushdown(tt.modifier)) + }) + } +} + +func TestTableHasPublicFTSIndexOnColumn(t *testing.T) { + ftsIdx := func(name, column string, state model.SchemaState) *model.IndexInfo { + return &model.IndexInfo{ + Name: ast.NewCIStr(name), + State: state, + Tp: ast.IndexTypeInvalid, + Columns: []*model.IndexColumn{{Name: ast.NewCIStr(column)}}, + FullTextInfo: &model.FullTextIndexInfo{ParserType: model.FullTextParserTypeStandardV1}, + } + } + plainIdx := func(name, column string) *model.IndexInfo { + return &model.IndexInfo{ + Name: ast.NewCIStr(name), + State: model.StatePublic, + Tp: ast.IndexTypeBtree, + Columns: []*model.IndexColumn{{Name: ast.NewCIStr(column)}}, + } + } + + tests := []struct { + name string + indices []*model.IndexInfo + column string + expected bool + }{ + { + name: "no indices", + indices: nil, + column: "title", + expected: false, + }, + { + name: "only non-FTS index on the column", + indices: []*model.IndexInfo{plainIdx("idx_title", "title")}, + column: "title", + expected: false, + }, + { + name: "public FTS index on the column", + indices: []*model.IndexInfo{ftsIdx("ft_title", "title", model.StatePublic)}, + column: "title", + expected: true, + }, + { + name: "non-public FTS index on the column", + indices: []*model.IndexInfo{ftsIdx("ft_title", "title", model.StateWriteReorganization)}, + column: "title", + expected: false, + }, + { + name: "FTS index on a different column", + indices: []*model.IndexInfo{ftsIdx("ft_body", "body", model.StatePublic)}, + column: "title", + expected: false, + }, + { + name: "FTS index covers the column among many indices", + indices: []*model.IndexInfo{ + plainIdx("idx_id", "id"), + ftsIdx("ft_body", "body", model.StatePublic), + ftsIdx("ft_title", "title", model.StatePublic), + }, + column: "title", + expected: true, + }, + { + name: "case-insensitive column match", + indices: []*model.IndexInfo{ftsIdx("ft_title", "Title", model.StatePublic)}, + column: "title", + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tblInfo := &model.TableInfo{Indices: tt.indices} + require.Equal(t, tt.expected, tableHasPublicFTSIndexOnColumn(tblInfo, tt.column)) + }) + } +} diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index 09d433dbb9967..ad61bad207de9 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -319,10 +319,56 @@ type PlanBuilder struct { // resolveCtx is set when calling Build, it's only effective in the current Build call. resolveCtx *resolve.Context +<<<<<<< HEAD // SavedViews is a stack that saves all views when traversing the AST. We depend on it to: // 1. know whether the AST node is under a view // 2. report precise error in appendColNamesToVisitInfo. SavedViews []*ast.TableName +======= + // nonViableFTSMatch is set during build when the expression rewriter + // encounters a predicate-context MATCH...AGAINST whose native form + // (FTSMysqlMatchAgainst) cannot be executed — the matched columns lack a + // public FULLTEXT index on a TiFlash-backed table, or the modifier is not + // supported by pushdown. The flag is read by the alternative-rounds driver + // after the round to invalidate the round's plan and trigger the + // fts-like-fallback round (see optimize.go). + nonViableFTSMatch bool + + // predicateMatchSeen is set during build when the expression rewriter + // encounters a direct-boolean-context MATCH...AGAINST (one whose 0/1 boolean + // result is consumed directly as a predicate). The alternative-rounds driver + // uses this to enable the fts-like-fallback round even when round 1's + // native plan is executable, so the LIKE-based plan can compete on cost. + predicateMatchSeen bool +} + +// HasNonViableFTSMatch reports whether the most recent build round saw a +// predicate-context MATCH...AGAINST that could not be served by the native +// FTSMysqlMatchAgainst builtin. The caller (optimize.go) uses this to +// invalidate the round's plan and trigger the fts-like-fallback round. +func (b *PlanBuilder) HasNonViableFTSMatch() bool { + return b.nonViableFTSMatch +} + +// MarkNonViableFTSMatch records that a predicate-context MATCH...AGAINST in +// the current build cannot be served natively. See HasNonViableFTSMatch. +func (b *PlanBuilder) MarkNonViableFTSMatch() { + b.nonViableFTSMatch = true +} + +// HasPredicateMatch reports whether the most recent build round saw a +// direct-boolean-context MATCH...AGAINST. The caller (optimize.go) uses this +// to decide whether to run the fts-like-fallback round for cost competition, +// independent of whether round 1's native plan is executable. +func (b *PlanBuilder) HasPredicateMatch() bool { + return b.predicateMatchSeen +} + +// MarkPredicateMatch records that the current build encountered a +// direct-boolean-context MATCH...AGAINST. See HasPredicateMatch. +func (b *PlanBuilder) MarkPredicateMatch() { + b.predicateMatchSeen = true +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) } type handleColHelper struct { diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 8f19e77467d85..8ac82e17a1b38 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -466,9 +466,241 @@ var planBuilderPool = sync.Pool{ }, } +<<<<<<< HEAD // optimizeCnt is a global variable only used for test. var optimizeCnt int +======= +type logicalPlanBuildCtx struct { + stmtCtxState stmtctx.LogicalPlanBuildState + plannerSelectBlockAsName *[]ast.HintTable + mapScalarSubQ []any + mapHashCode2UniqueID map[string]int + rewritePhaseInfo variable.RewritePhaseInfo +} + +func saveLogicalPlanBuildCtx(sessVars *variable.SessionVars) logicalPlanBuildCtx { + return logicalPlanBuildCtx{ + stmtCtxState: sessVars.StmtCtx.SaveLogicalPlanBuildState(), + plannerSelectBlockAsName: sessVars.PlannerSelectBlockAsName.Load(), + mapScalarSubQ: sessVars.MapScalarSubQ, + mapHashCode2UniqueID: sessVars.MapHashCode2UniqueID4ExtendedCol, + rewritePhaseInfo: sessVars.RewritePhaseInfo, + } +} + +func restoreLogicalPlanBuildCtx(sessVars *variable.SessionVars, logicalPlanCtx logicalPlanBuildCtx) { + sessVars.StmtCtx.RestoreLogicalPlanBuildState(logicalPlanCtx.stmtCtxState) + sessVars.PlannerSelectBlockAsName.Store(logicalPlanCtx.plannerSelectBlockAsName) + sessVars.MapScalarSubQ = logicalPlanCtx.mapScalarSubQ + sessVars.MapHashCode2UniqueID4ExtendedCol = logicalPlanCtx.mapHashCode2UniqueID + sessVars.RewritePhaseInfo = logicalPlanCtx.rewritePhaseInfo +} + +func buildAndOptimizeLogicalPlanRound( + ctx context.Context, + sctx planctx.PlanContext, + node *resolve.NodeW, + is infoschema.InfoSchema, + hintProcessor *hint.QBHintHandler, + checked *bool, + optimizeStarted *bool, + beginOpt *time.Time, + needRestoreLogicalPlanCtx bool, + bestPlan *base.PhysicalPlan, + bestNames *types.NameSlice, + bestCost *float64, + bestLogicalPlanCtx *logicalPlanBuildCtx, + optFlagAdjust func(uint64) uint64, +) (base.Plan, types.NameSlice, bool, error) { + builder := planBuilderPool.Get().(*core.PlanBuilder) + defer planBuilderPool.Put(builder.ResetForReuse()) + // TODO: when buildRound > 1, only emit unused view-hint warnings for the winner build. + defer builder.HandleUnusedViewHints() + + builder.Init(sctx, is, hintProcessor) + + // todo: you can customize each round's special builder (like semi join rewrite or not by signal) + p, err := buildLogicalPlan(ctx, sctx, node, builder) + if err != nil { + return nil, nil, false, err + } + names := p.OutputNames() + + if !*checked { + // Keep privilege and lock checks fail-fast. These depend on visitInfo + // produced by the logical build, but not on the later cost winner. + if pm := privilege.GetPrivilegeManager(sctx); pm != nil { + visitInfo := core.VisitInfo4PrivCheck(ctx, is, node.Node, builder.GetVisitInfo()) + if err := core.CheckPrivilege(sctx.GetSessionVars().ActiveRoles, pm, visitInfo); err != nil { + return nil, nil, false, err + } + } + + if err := core.CheckTableLock(sctx, is, builder.GetVisitInfo()); err != nil { + return nil, nil, false, err + } + + if err := core.CheckTableMode(node); err != nil { + return nil, nil, false, err + } + *checked = true + } + + // Handle the non-logical plan statement. + logic, isLogicalPlan := p.(base.LogicalPlan) + if !isLogicalPlan { + return p, names, true, nil + } + + core.RecheckCTE(logic) + + // todo: also you can customize each round's special logical opt flag here (like decorrelate rule or not) + if !*optimizeStarted { + *optimizeStarted = true + *beginOpt = time.Now() + } + optFlag := builder.GetOptFlag() + if sctx.GetSessionVars().EnableAlternativeLogicalPlans && + optFlag&rule.FlagPushDownTopN > 0 && + optFlag&rule.FlagJoinReOrder > 0 { + sctx.GetSessionVars().StmtCtx.MarkAlternativeLogicalPlanOrderAwareJoinReorder() + } + if optFlagAdjust != nil { + optFlag = optFlagAdjust(optFlag) + } + finalPlan, cost, err := core.DoOptimize(ctx, sctx, optFlag, logic) + if err != nil { + return nil, nil, false, err + } + + // Record predicate-context MATCH for cost competition. The fts-like-fallback + // alternative round reads this signal to decide whether to build a competing + // ILIKE-based plan alongside round 1's native plan, so the cheaper of the + // two wins via the normal alt-rounds cost comparison. + if builder.HasPredicateMatch() { + sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanHasPredicateContextMatch = true + } + + // If this round saw a predicate-context MATCH that cannot be served by the + // native FTSMysqlMatchAgainst builtin, the produced plan would fail at + // execution. Discard it and arm AlternativeLogicalPlanFTSLikeFallback so + // any intervening rounds (correlate, etc.) re-rewrite with ILIKE too. The + // fts-like-fallback round below also forces this flag during setup; this + // outer assignment covers the non-viable case where the flag must stay + // true across all subsequent rounds, not just inside the LIKE round. + if builder.HasNonViableFTSMatch() { + sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + return p, names, false, nil + } + + if *bestPlan == nil || cost < *bestCost { + *bestCost = cost + *bestPlan = finalPlan + *bestNames = names + if needRestoreLogicalPlanCtx { + *bestLogicalPlanCtx = saveLogicalPlanBuildCtx(sctx.GetSessionVars()) + } + } + return p, names, false, nil +} + +// optimizeCnt is a global variable only used for test. +var optimizeCnt int + +func shouldTryNonDecorrelationRound(sessVars *variable.SessionVars) bool { + return sessVars.EnableAlternativeLogicalPlans && + sessVars.StmtCtx.AlternativeLogicalPlanDecorrelatedApply && + !sessVars.StmtCtx.AlternativeLogicalPlanSameOrderIndexJoin +} + +func shouldTryOrderAwareReorderRound(sessVars *variable.SessionVars) bool { + return sessVars.EnableAlternativeLogicalPlans && + sessVars.StmtCtx.AlternativeLogicalPlanOrderAwareJoinReorder +} + +func shouldTryCorrelateRound(sessVars *variable.SessionVars) bool { + return sessVars.EnableAlternativeLogicalPlans && + sessVars.StmtCtx.AlternativeLogicalPlanPreferCorrelate +} + +// alternativeRound describes one alternative logical-plan round. +// adjustFlag adjusts the optimization flags for the round. +// enabled returns true when the round should be attempted. +// setup/cleanup optionally modify session state before/after plan building. +type alternativeRound struct { + name string + adjustFlag func(uint64) uint64 + enabled func(*variable.SessionVars) bool + setup func(*variable.SessionVars) + cleanup func(*variable.SessionVars) +} + +// savedEnableCorrelateSubquery holds the pre-round value of +// EnableCorrelateSubquery so setup/cleanup can share it without a closure +// wrapper. Safe because optimize is single-threaded per session. +var savedEnableCorrelateSubquery bool + +// savedFTSLikeFallback holds the pre-round value of +// AlternativeLogicalPlanFTSLikeFallback so the fts-like-fallback round's +// setup/cleanup can restore it after running with the flag forced on. Safe +// because optimize is single-threaded per session. +var savedFTSLikeFallback bool + +var alternativeRounds = [...]alternativeRound{ + { + name: "non-decorrelate", + adjustFlag: func(flag uint64) uint64 { return flag &^ rule.FlagDecorrelate }, + enabled: shouldTryNonDecorrelationRound, + }, + { + name: "order-aware-reorder", + adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagOrderAwareJoinReorder }, + enabled: shouldTryOrderAwareReorderRound, + }, + { + name: "correlate", + adjustFlag: func(flag uint64) uint64 { return flag | rule.FlagCorrelate }, + enabled: shouldTryCorrelateRound, + setup: func(sv *variable.SessionVars) { + savedEnableCorrelateSubquery = sv.EnableCorrelateSubquery + sv.EnableCorrelateSubquery = true + }, + cleanup: func(sv *variable.SessionVars) { + sv.EnableCorrelateSubquery = savedEnableCorrelateSubquery + }, + }, + { + // fts-like-fallback: rebuild the plan rewriting predicate-context + // MATCH...AGAINST to ILIKE so it can compete with round 1's native plan + // on cost (and serve as the only valid plan when native is non-viable). + // Round 1 always uses the native builtin (same as Alt-disabled). This + // round fires whenever round 1 saw a direct-boolean-context MATCH + // (HasPredicateContextMatch) — both plans then compete via the strict-`<` + // cost comparison in buildAndOptimizeLogicalPlanRound — or whenever + // round 1 saw a MATCH whose native form cannot execute + // (FTSLikeFallback, set by the round driver after discarding round 1). + // In the discard case, round 1's plan is unavailable and this round's + // plan wins by default. + name: "fts-like-fallback", + enabled: func(sv *variable.SessionVars) bool { + if !sv.EnableAlternativeLogicalPlans { + return false + } + return sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback || + sv.StmtCtx.AlternativeLogicalPlanHasPredicateContextMatch + }, + setup: func(sv *variable.SessionVars) { + savedFTSLikeFallback = sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + }, + cleanup: func(sv *variable.SessionVars) { + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = savedFTSLikeFallback + }, + }, +} + +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW, is infoschema.InfoSchema) (base.Plan, types.NameSlice, float64, error) { failpoint.Inject("checkOptimizeCountOne", func(val failpoint.Value) { // only count the optimization for SQL with specified text @@ -484,9 +716,52 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW topsql.MockHighCPULoad(sctx.GetSessionVars().StmtCtx.OriginalSQL, sqlPrefixes, 10) }) sessVars := sctx.GetSessionVars() +<<<<<<< HEAD if sessVars.StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) defer debugtrace.LeaveContextCommon(sctx) +======= + var ( + beginOpt time.Time + optimizeStarted bool + ) + defer func() { + if optimizeStarted { + sessVars.DurationOptimizer.Total = time.Since(beginOpt) + } + }() + + // Build the logical plan from the raw AST. The hint processor only keeps + // AST-derived metadata; per-build state is allocated inside PlanBuilder. + hintProcessor := hint.NewQBHintHandler(sctx.GetSessionVars().StmtCtx) + node.Node.Accept(hintProcessor) + + // build multi logical plan from raw AST. + var ( + needRestoreLogicalPlanCtx = sessVars.EnableAlternativeLogicalPlans + bestCost = math.MaxFloat64 + bestPlan base.PhysicalPlan + bestNames types.NameSlice + bestLogicalPlanCtx logicalPlanBuildCtx + checked bool + ) + var initialLogicalPlanCtx logicalPlanBuildCtx + if needRestoreLogicalPlanCtx { + initialLogicalPlanCtx = saveLogicalPlanBuildCtx(sessVars) + sessVars.StmtCtx.ResetAlternativeLogicalPlanSignals() + // Round 1 always uses the native FTSMysqlMatchAgainst builtin — same as + // the Alt-disabled default. The build records two signals on the + // planBuilder when MATCH...AGAINST is seen: + // * HasPredicateMatch: any direct-boolean-context MATCH. The round + // driver propagates this into stmtctx to trigger the + // fts-like-fallback alternative round, which builds a competing + // ILIKE-based plan; the cheaper of the two wins. + // * HasNonViableFTSMatch: a predicate-context MATCH whose native form + // cannot execute (no FTS index / no TiFlash replica / unsupported + // modifier). The round driver discards round 1's plan and forces + // AlternativeLogicalPlanFTSLikeFallback true so all subsequent + // rounds (correlate, etc.) re-rewrite with ILIKE. +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) } // build logical plan @@ -528,6 +803,7 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW return p, names, 0, nil } +<<<<<<< HEAD core.RecheckCTE(logic) // Handle the logical plan statement, use cascades planner if enabled. @@ -542,6 +818,85 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW sessVars.DurationOptimization = time.Since(beginOpt) return finalPlan, names, cost, err +======= + // Pre-compute which rounds are enabled based on the signals from the first + // (default) build. This prevents signal leakage: alternative rounds rebuild + // the plan and may set AlternativeLogicalPlan* signals as a side effect, + // which are not reset by restoreLogicalPlanBuildCtx. Evaluating enabled() + // upfront ensures each round's eligibility is determined solely by the + // original build's signals. + enabledRounds := make([]alternativeRound, 0, len(alternativeRounds)) + for _, round := range alternativeRounds { + if round.enabled(sessVars) { + enabledRounds = append(enabledRounds, round) + } + } + var lastAltRoundErr error + for _, round := range enabledRounds { + restoreLogicalPlanBuildCtx(sessVars, initialLogicalPlanCtx) + failpoint.Inject("failIfAlternativeLogicalPlanRoundTriggered", func(val failpoint.Value) { + if testSQL, ok := val.(string); ok && testSQL == node.Node.OriginalText() { + failpoint.Return(nil, nil, 0, errors.New("unexpected alternative logical plan round")) + } + }) + + // Use a closure so that defer-based cleanup runs at the end of each + // iteration, not at function exit. This ensures session state (e.g. + // EnableCorrelateSubquery) is restored even if the round panics. + func() { + if round.setup != nil { + round.setup(sessVars) + defer round.cleanup(sessVars) + } + p, names, nonLogical, err = buildAndOptimizeLogicalPlanRound( + ctx, + sctx, + node, + is, + hintProcessor, + &checked, + &optimizeStarted, + &beginOpt, + needRestoreLogicalPlanCtx, + &bestPlan, + &bestNames, + &bestCost, + &bestLogicalPlanCtx, + round.adjustFlag, + ) + }() + if err != nil { + // Alternative rounds are optional optimizations. If one fails, + // log and continue — the first round's plan is still valid in + // the general case. fts-like-fallback is the exception: the + // first round's plan may have been discarded as non-executable, + // so we remember the last alt-round error in case bestPlan + // remains nil after the loop. + logutil.BgLogger().Warn("alternative logical plan round failed", + zap.String("round", round.name), + zap.Error(err)) + lastAltRoundErr = err + continue + } + if nonLogical { + return p, names, 0, nil + } + } + if bestPlan == nil { + if lastAltRoundErr != nil { + // No valid plan from any round. Surface the most recent alt-round + // error rather than the generic sentinel — typically this is the + // fts-like-fallback round reporting why MATCH...AGAINST cannot be + // rewritten (unsupported search string, etc.). + return nil, nil, 0, lastAltRoundErr + } + return nil, nil, 0, errors.New("failed to build logical plan") + } + if needRestoreLogicalPlanCtx { + restoreLogicalPlanBuildCtx(sessVars, bestLogicalPlanCtx) + } + return bestPlan, bestNames, bestCost, nil +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) } // OptimizeExecStmt to handle the "execute" statement diff --git a/pkg/planner/util/null_misc_test.go b/pkg/planner/util/null_misc_test.go new file mode 100644 index 0000000000000..0e2c1eb41f741 --- /dev/null +++ b/pkg/planner/util/null_misc_test.go @@ -0,0 +1,467 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "crypto/sha256" + "encoding/hex" + "strings" + "testing" + + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/sessionctx/vardef" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/mock" + "github.com/stretchr/testify/require" +) + +// TestNullRejectBuiltinRegistrySnapshot guards against silent builtin registry +// drift. When this hash breaks, the builtin set has changed — review whether +// new functions should be added to nullRejectNullPreservingFunctions or +// nullRejectRejectNullTests in null_misc_builtins.go. +func TestNullRejectBuiltinRegistrySnapshot(t *testing.T) { + names := expression.RegisteredBuiltinFunctionNames() + sum := sha256.Sum256([]byte(strings.Join(names, "\n"))) + + require.NotEmpty(t, names) + require.Equal(t, "729f5252bcd91efe1a4bbf0c383a36c5a2e52ed2d90d7aab0a3e0b450322294c", hex.EncodeToString(sum[:])) + + for name := range nullRejectRejectNullTests { + require.Contains(t, names, name) + } +} + +func TestIsNullRejectedProofModes(t *testing.T) { + sctx := mock.NewContext() + require.NoError(t, sctx.GetSessionVars().SetSystemVar(vardef.BlockEncryptionMode, "aes-128-ecb")) + exprCtx := sctx.GetExprCtx() + + innerA := newNullRejectIntColumn(1) + innerB := newNullRejectIntColumn(2) + outerC := newNullRejectIntColumn(3) + innerS := newNullRejectStringColumn(4) + innerUnsignedD := newNullRejectUintColumn(5) + innerSchema := expression.NewSchema(innerA, innerB, innerS, innerUnsignedD) + + gtInnerAZero := newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), innerA, expression.NewZero()) + eqInnerAZero := newNullRejectFunc(t, exprCtx, ast.EQ, types.NewFieldType(mysql.TypeTiny), innerA, expression.NewZero()) + gtOuterCZero := newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), outerC, expression.NewZero()) + likeWrappedInnerA := newNullRejectLike(t, exprCtx, expression.BuildCastFunction(exprCtx, innerA, types.NewFieldType(mysql.TypeVarString))) + coalesceInnerA := newNullRejectFunc(t, exprCtx, ast.Coalesce, types.NewFieldType(mysql.TypeLonglong), innerA, expression.NewOne()) + coalesceInnerATwo := newNullRejectFunc(t, exprCtx, ast.Coalesce, types.NewFieldType(mysql.TypeLonglong), innerA, newNullRejectIntConst(2)) + nullSafeEqInnerA := newNullRejectFunc(t, exprCtx, ast.NullEQ, types.NewFieldType(mysql.TypeTiny), innerA, expression.NewOne()) + fieldInnerA := newNullRejectFunc(t, exprCtx, ast.Field, types.NewFieldType(mysql.TypeLonglong), innerA, expression.NewOne()) + formatNullLocaleEq := newNullRejectFunc(t, exprCtx, ast.EQ, types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, exprCtx, ast.Format, types.NewFieldType(mysql.TypeVarString), + newNullRejectIntConst(12345), + newNullRejectIntConst(0), + innerS, + ), + newNullRejectStringConst("12,345"), + ) + quoteInnerSLikeA := newNullRejectFunc(t, exprCtx, ast.Like, types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, exprCtx, ast.Quote, types.NewFieldType(mysql.TypeVarString), innerS), + newNullRejectStringConst("A%"), + newNullRejectIntConst(92), + ) + issue66824LikePredicate := newNullRejectFunc(t, exprCtx, ast.GE, types.NewFieldType(mysql.TypeTiny), + expression.NewOne(), + newNullRejectFunc(t, exprCtx, ast.LogicAnd, types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, exprCtx, ast.LogicOr, types.NewFieldType(mysql.TypeTiny), innerA, expression.NewNull()), + newNullRejectFunc(t, exprCtx, ast.NE, types.NewFieldType(mysql.TypeTiny), outerC, outerC), + ), + ) + ifInnerANullThenZeroElseOuterC := newNullRejectFunc(t, exprCtx, ast.If, types.NewFieldType(mysql.TypeLonglong), + newNullRejectFunc(t, exprCtx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), innerA), + expression.NewZero(), + outerC, + ) + truncateUnsignedByNullableScale := newNullRejectFunc( + t, + exprCtx, + ast.Truncate, + newNullRejectUintFieldType(mysql.TypeLonglong), + newNullRejectUintConst(123), + innerUnsignedD, + ) + aesEncryptIgnoringNullableIV := newNullRejectFunc( + t, + exprCtx, + ast.AesEncrypt, + types.NewFieldType(mysql.TypeVarString), + newNullRejectStringConst("pingcap"), + newNullRejectStringConst("123"), + innerS, + ) + aesDecryptIgnoringNullableIV := newNullRejectFunc( + t, + exprCtx, + ast.AesDecrypt, + types.NewFieldType(mysql.TypeVarString), + newNullRejectFunc(t, exprCtx, ast.Unhex, types.NewFieldType(mysql.TypeVarString), newNullRejectStringConst("996E0CA8688D7AD20819B90B273E01C6")), + newNullRejectStringConst("123"), + innerS, + ) + jsonSetNullableValue := newNullRejectFunc( + t, + exprCtx, + ast.JSONSet, + types.NewFieldType(mysql.TypeJSON), + newNullRejectJSONConst(t, `{}`), + newNullRejectStringConst("$.a"), + innerS, + ) + jsonInsertNullableValue := newNullRejectFunc( + t, + exprCtx, + ast.JSONInsert, + types.NewFieldType(mysql.TypeJSON), + newNullRejectJSONConst(t, `{}`), + newNullRejectStringConst("$.a"), + innerS, + ) + jsonReplaceNullableValue := newNullRejectFunc( + t, + exprCtx, + ast.JSONReplace, + types.NewFieldType(mysql.TypeJSON), + newNullRejectJSONConst(t, `{"a": 1}`), + newNullRejectStringConst("$.a"), + innerS, + ) + jsonArrayAppendNullableValue := newNullRejectFunc( + t, + exprCtx, + ast.JSONArrayAppend, + types.NewFieldType(mysql.TypeJSON), + newNullRejectJSONConst(t, `[]`), + newNullRejectStringConst("$"), + innerS, + ) + jsonArrayInsertNullableValue := newNullRejectFunc( + t, + exprCtx, + ast.JSONArrayInsert, + types.NewFieldType(mysql.TypeJSON), + newNullRejectJSONConst(t, `[]`), + newNullRejectStringConst("$[0]"), + innerS, + ) + jsonMergePatchNullableDoc := newNullRejectFunc( + t, + exprCtx, + ast.JSONMergePatch, + types.NewFieldType(mysql.TypeJSON), + expression.BuildCastFunction(exprCtx, innerS, types.NewFieldType(mysql.TypeJSON)), + newNullRejectJSONConst(t, `null`), + newNullRejectJSONConst(t, `{"a": 1}`), + newNullRejectJSONConst(t, `[1, 2, 3]`), + ) + jsonSearchNullableEscape := newNullRejectFunc( + t, + exprCtx, + ast.JSONSearch, + types.NewFieldType(mysql.TypeJSON), + newNullRejectJSONConst(t, `["abc"]`), + newNullRejectStringConst("one"), + newNullRejectStringConst("abc"), + innerS, + ) + deferredInnerGTZero := newNullRejectDeferredConst(exprCtx, gtInnerAZero) + deferredCoalesceInnerATwoGTTwo := newNullRejectDeferredConst(exprCtx, + newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), coalesceInnerATwo, newNullRejectIntConst(2)), + ) + deferredOneWithNullPlaceholder := newNullRejectDeferredConst(exprCtx, expression.NewOne()) + + cases := []struct { + name string + expr expression.Expression + expected bool + }{ + { + name: "or_needs_both_sides_non_true", + expr: newNullRejectFunc(t, exprCtx, ast.LogicOr, types.NewFieldType(mysql.TypeTiny), + gtInnerAZero, + newNullRejectFunc(t, exprCtx, ast.LogicAnd, types.NewFieldType(mysql.TypeTiny), eqInnerAZero, gtOuterCZero)), + expected: true, + }, + { + name: "not_uses_must_null", + expr: newNullRejectFunc(t, exprCtx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), gtInnerAZero), + expected: true, + }, + { + name: "is_null_accepts_null", + expr: newNullRejectFunc(t, exprCtx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), innerA), + expected: false, + }, + { + name: "is_true_rejects_null", + expr: newNullRejectFunc(t, exprCtx, ast.IsTruthWithNull, types.NewFieldType(mysql.TypeTiny), innerA), + expected: true, + }, + { + name: "not_is_null_rejects_null", + expr: newNullRejectFunc( + t, + exprCtx, + ast.UnaryNot, + types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, exprCtx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), innerA), + ), + expected: true, + }, + { + name: "null_preserving_wrapper_propagates_must_null", + expr: likeWrappedInnerA, + expected: true, + }, + { + name: "coalesce_constant_fallback_can_still_be_non_true", + expr: newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), coalesceInnerATwo, newNullRejectIntConst(2)), + expected: true, + }, + { + name: "null_hiding_wrapper_stays_conservative", + expr: newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), coalesceInnerA, expression.NewZero()), + expected: false, + }, + { + name: "in_with_all_list_items_null_rejected", + expr: newNullRejectFunc(t, exprCtx, ast.In, types.NewFieldType(mysql.TypeTiny), + expression.NewOne(), innerA, innerB), + expected: true, + }, + { + name: "in_with_non_null_candidate_is_not_proven", + expr: newNullRejectFunc(t, exprCtx, ast.In, types.NewFieldType(mysql.TypeTiny), + expression.NewOne(), innerA, expression.NewOne()), + expected: false, + }, + { + name: "null_safe_eq_with_non_null_constant_rejects_null", + expr: nullSafeEqInnerA, + expected: true, + }, + { + name: "format_with_null_locale_is_not_null_rejected", + expr: formatNullLocaleEq, + expected: false, + }, + { + name: "field_with_null_input_can_make_not_predicate_true", + expr: newNullRejectFunc(t, exprCtx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), fieldInnerA, expression.NewZero())), + expected: false, + }, + { + name: "quote_with_null_input_can_make_not_predicate_true", + expr: newNullRejectFunc(t, exprCtx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), quoteInnerSLikeA), + expected: false, + }, + { + // Issue #66824: an outer comparison can still evaluate TRUE when the + // inner AND branch is merely nonTrue rather than mustNull. + name: "comparison_over_non_true_and_is_not_null_rejected", + expr: issue66824LikePredicate, + expected: false, + }, + { + name: "if_condition_folded_after_nullification_stays_provable", + expr: newNullRejectFunc(t, exprCtx, ast.GT, types.NewFieldType(mysql.TypeTiny), ifInnerANullThenZeroElseOuterC, expression.NewZero()), + expected: true, + }, + { + name: "truncate_with_unsigned_nullable_scale_is_not_null_preserving", + expr: newNullRejectFunc( + t, + exprCtx, + ast.GT, + types.NewFieldType(mysql.TypeTiny), + truncateUnsignedByNullableScale, + expression.NewZero(), + ), + expected: false, + }, + { + name: "aes_encrypt_ignores_nullable_iv_in_ecb_mode", + expr: newNullRejectNotNull(t, exprCtx, aesEncryptIgnoringNullableIV), + expected: false, + }, + { + name: "aes_decrypt_ignores_nullable_iv_in_ecb_mode", + expr: newNullRejectNotNull(t, exprCtx, aesDecryptIgnoringNullableIV), + expected: false, + }, + { + name: "json_set_nullable_value_becomes_json_null", + expr: newNullRejectNotNull(t, exprCtx, jsonSetNullableValue), + expected: false, + }, + { + name: "json_insert_nullable_value_becomes_json_null", + expr: newNullRejectNotNull(t, exprCtx, jsonInsertNullableValue), + expected: false, + }, + { + name: "json_replace_nullable_value_becomes_json_null", + expr: newNullRejectNotNull(t, exprCtx, jsonReplaceNullableValue), + expected: false, + }, + { + name: "json_array_append_nullable_value_becomes_json_null", + expr: newNullRejectNotNull(t, exprCtx, jsonArrayAppendNullableValue), + expected: false, + }, + { + name: "json_array_insert_nullable_value_becomes_json_null", + expr: newNullRejectNotNull(t, exprCtx, jsonArrayInsertNullableValue), + expected: false, + }, + { + name: "json_merge_patch_nullable_argument_can_still_return_document", + expr: newNullRejectNotNull(t, exprCtx, jsonMergePatchNullableDoc), + expected: false, + }, + { + name: "json_search_nullable_escape_falls_back_to_default_escape", + expr: newNullRejectNotNull(t, exprCtx, jsonSearchNullableEscape), + expected: false, + }, + { + name: "deferred_expr_uses_symbolic_null_reject_proof", + expr: deferredInnerGTZero, + expected: true, + }, + { + name: "deferred_expr_skips_nullified_fold", + expr: deferredCoalesceInnerATwoGTTwo, + expected: false, + }, + { + name: "deferred_expr_does_not_classify_placeholder_null", + expr: deferredOneWithNullPlaceholder, + expected: false, + }, + } + + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.expected, IsNullRejected(sctx, innerSchema, tt.expr)) + }) + } +} + +func newNullRejectIntColumn(id int64) *expression.Column { + return &expression.Column{ + UniqueID: id, + ID: id, + Index: int(id), + RetType: types.NewFieldType(mysql.TypeLonglong), + } +} + +func newNullRejectStringColumn(id int64) *expression.Column { + return &expression.Column{ + UniqueID: id, + ID: id, + Index: int(id), + RetType: types.NewFieldType(mysql.TypeVarString), + } +} + +func newNullRejectUintColumn(id int64) *expression.Column { + return &expression.Column{ + UniqueID: id, + ID: id, + Index: int(id), + RetType: newNullRejectUintFieldType(mysql.TypeLonglong), + } +} + +func newNullRejectStringConst(value string) *expression.Constant { + return &expression.Constant{ + Value: types.NewStringDatum(value), + RetType: types.NewFieldType(mysql.TypeVarString), + } +} + +func newNullRejectIntConst(value int64) *expression.Constant { + return &expression.Constant{ + Value: types.NewIntDatum(value), + RetType: types.NewFieldType(mysql.TypeLonglong), + } +} + +func newNullRejectUintConst(value uint64) *expression.Constant { + return &expression.Constant{ + Value: types.NewUintDatum(value), + RetType: newNullRejectUintFieldType(mysql.TypeLonglong), + } +} + +// newNullRejectDeferredConst builds a deferred constant with a NULL placeholder value. +func newNullRejectDeferredConst(ctx expression.BuildContext, deferred expression.Expression) *expression.Constant { + return &expression.Constant{ + Value: types.NewDatum(nil), + RetType: deferred.GetType(ctx.GetEvalCtx()), + DeferredExpr: deferred, + } +} + +func newNullRejectUintFieldType(tp byte) *types.FieldType { + fieldType := types.NewFieldType(tp) + fieldType.AddFlag(mysql.UnsignedFlag) + return fieldType +} + +func newNullRejectFunc(t *testing.T, ctx expression.BuildContext, name string, retType *types.FieldType, args ...expression.Expression) expression.Expression { + expr, err := expression.NewFunction(ctx, name, retType, args...) + require.NoError(t, err) + return expr +} + +func newNullRejectJSONConst(t *testing.T, value string) *expression.Constant { + jsonValue, err := types.ParseBinaryJSONFromString(value) + require.NoError(t, err) + return &expression.Constant{ + Value: types.NewJSONDatum(jsonValue), + RetType: types.NewFieldType(mysql.TypeJSON), + } +} + +func newNullRejectNotNull(t *testing.T, ctx expression.BuildContext, arg expression.Expression) expression.Expression { + return newNullRejectFunc( + t, + ctx, + ast.UnaryNot, + types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), arg), + ) +} + +func newNullRejectLike(t *testing.T, ctx expression.BuildContext, arg expression.Expression) expression.Expression { + return newNullRejectFunc( + t, + ctx, + ast.Like, + types.NewFieldType(mysql.TypeTiny), + newNullRejectFunc(t, ctx, ast.Trim, types.NewFieldType(mysql.TypeVarString), arg), + newNullRejectStringConst("1%"), + newNullRejectIntConst(92), + ) +} diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 7e3feeb3732f0..0959b33bfa26d 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -446,6 +446,44 @@ type StatementContext struct { UseDynamicPruneMode bool // ColRefFromPlan mark the column ref used by assignment in update statement. ColRefFromUpdatePlan intset.FastIntSet +<<<<<<< HEAD +======= + // AlternativeLogicalPlanDecorrelatedApply indicates whether the current logical + // optimization round decorrelated at least one Apply into Join. + AlternativeLogicalPlanDecorrelatedApply bool + // AlternativeLogicalPlanSameOrderIndexJoin indicates whether the current first + // round already produced a same-order index join candidate for a decorrelated Apply. + AlternativeLogicalPlanSameOrderIndexJoin bool + // AlternativeLogicalPlanOrderAwareJoinReorder indicates whether at least one + // logical build round produced an order-aware join reorder candidate that is + // worth exploring in a dedicated alternative round. + AlternativeLogicalPlanOrderAwareJoinReorder bool + // AlternativeLogicalPlanPreferCorrelate indicates whether the current logical + // build round encountered a non-correlated IN subquery eligible for the + // correlate-to-Apply alternative. + AlternativeLogicalPlanPreferCorrelate bool + // AlternativeLogicalPlanFTSLikeFallback is a mode flag controlling how the + // expression rewriter handles MATCH...AGAINST in predicate contexts. When + // false (the default, matching Alt-disabled behavior) the rewriter emits + // the native FTSMysqlMatchAgainst builtin. When true, the rewriter emits + // ILIKE-based predicates instead. + // + // Round 1 always runs with this flag false. The "fts-like-fallback" + // alternative round flips it to true (via its setup/cleanup) while it + // builds a competing ILIKE-based plan; the cost-cheapest plan wins via the + // normal alt-rounds cost comparison. If round 1's build records a + // predicate-context MATCH that cannot be served natively (no FTS index on a + // matched column / no TiFlash replica / modifier not pushdown-supported), + // optimize.go additionally invalidates round 1's plan and forces this flag + // true outside the round so any intervening rounds (correlate, etc.) also + // produce executable LIKE-based plans. + AlternativeLogicalPlanFTSLikeFallback bool + // AlternativeLogicalPlanHasPredicateContextMatch indicates that round 1 + // encountered a direct-boolean-context MATCH...AGAINST. The round driver + // uses this to enable the fts-like-fallback round for cost competition even + // when round 1's native plan is executable. + AlternativeLogicalPlanHasPredicateContextMatch bool +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -572,6 +610,83 @@ func (sc *StatementContext) Reset() bool { return true } +<<<<<<< HEAD +======= +// SaveLogicalPlanBuildState captures the statement-scoped planner state before building +// another logical plan candidate from the same AST. +func (sc *StatementContext) SaveLogicalPlanBuildState() LogicalPlanBuildState { + planCacheUseCache, planCacheType, planCacheUnqualified, planCacheForce, planCacheAlwaysWarn := sc.PlanCacheTracker.Save() + return LogicalPlanBuildState{ + warnings: slices.Clone(sc.GetWarnings()), + extraWarnings: slices.Clone(sc.GetExtraWarnings()), + tables: slices.Clone(sc.Tables), + tableStats: maps.Clone(sc.TableStats), + lockTableIDs: maps.Clone(sc.LockTableIDs), + tblInfo2UnionScan: maps.Clone(sc.TblInfo2UnionScan), + useDynamicPruneMode: sc.UseDynamicPruneMode, + viewDepth: sc.ViewDepth, + colRefFromUpdatePlan: sc.ColRefFromUpdatePlan.Copy(), + planCacheUseCache: planCacheUseCache, + planCacheType: planCacheType, + planCacheUnqualified: planCacheUnqualified, + planCacheForce: planCacheForce, + planCacheAlwaysWarn: planCacheAlwaysWarn, + } +} + +// RestoreLogicalPlanBuildState restores the statement-scoped planner state after a +// discarded logical plan build attempt. +func (sc *StatementContext) RestoreLogicalPlanBuildState(state LogicalPlanBuildState) { + sc.SetWarnings(slices.Clone(state.warnings)) + sc.SetExtraWarnings(slices.Clone(state.extraWarnings)) + sc.Tables = slices.Clone(state.tables) + sc.TableStats = maps.Clone(state.tableStats) + sc.LockTableIDs = maps.Clone(state.lockTableIDs) + sc.TblInfo2UnionScan = maps.Clone(state.tblInfo2UnionScan) + sc.UseDynamicPruneMode = state.useDynamicPruneMode + sc.ViewDepth = state.viewDepth + sc.ColRefFromUpdatePlan.CopyFrom(state.colRefFromUpdatePlan) + sc.PlanCacheTracker.Restore(state.planCacheUseCache, state.planCacheType, state.planCacheUnqualified, state.planCacheForce, state.planCacheAlwaysWarn) + sc.RangeFallbackHandler = contextutil.NewRangeFallbackHandler(&sc.PlanCacheTracker, sc) +} + +// ResetAlternativeLogicalPlanSignals clears the statement-local signals used by the +// alternative logical plan feature. +func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { + sc.AlternativeLogicalPlanDecorrelatedApply = false + sc.AlternativeLogicalPlanSameOrderIndexJoin = false + sc.AlternativeLogicalPlanOrderAwareJoinReorder = false + sc.AlternativeLogicalPlanFTSLikeFallback = false + sc.AlternativeLogicalPlanHasPredicateContextMatch = false + sc.AlternativeLogicalPlanPreferCorrelate = false +} + +// MarkAlternativeLogicalPlanDecorrelatedApply records that at least one Apply has +// been decorrelated into a Join in the current round. +func (sc *StatementContext) MarkAlternativeLogicalPlanDecorrelatedApply() { + sc.AlternativeLogicalPlanDecorrelatedApply = true +} + +// MarkAlternativeLogicalPlanSameOrderIndexJoin records that the current first round +// has already produced a same-order index join candidate for a decorrelated Apply. +func (sc *StatementContext) MarkAlternativeLogicalPlanSameOrderIndexJoin() { + sc.AlternativeLogicalPlanSameOrderIndexJoin = true +} + +// MarkAlternativeLogicalPlanOrderAwareJoinReorder records that the current +// logical build round produced an order-aware join reorder candidate. +func (sc *StatementContext) MarkAlternativeLogicalPlanOrderAwareJoinReorder() { + sc.AlternativeLogicalPlanOrderAwareJoinReorder = true +} + +// MarkAlternativeLogicalPlanPreferCorrelate records that the current logical +// build round encountered a non-correlated IN subquery that is eligible for +// the correlate-to-Apply alternative. +func (sc *StatementContext) MarkAlternativeLogicalPlanPreferCorrelate() { + sc.AlternativeLogicalPlanPreferCorrelate = true +} + +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) // CtxID returns the context id of the statement func (sc *StatementContext) CtxID() uint64 { return sc.ctxID diff --git a/tests/integrationtest/r/executor/show.result b/tests/integrationtest/r/executor/show.result index 77191bc3a6ca6..5973992a00610 100644 --- a/tests/integrationtest/r/executor/show.result +++ b/tests/integrationtest/r/executor/show.result @@ -756,7 +756,11 @@ ltrim make_set makedate maketime +<<<<<<< HEAD master_pos_wait +======= +match_against +>>>>>>> f96cd1c2fd5 (planner: rewrite FTS predicates to LIKE for evaluation of non-TiCI query plan (#65626)) md5 microsecond mid diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result new file mode 100644 index 0000000000000..30e7acaf517ea --- /dev/null +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -0,0 +1,240 @@ +set tidb_cost_model_version=1; +set @@tidb_opt_enable_alternative_logical_plans=ON; +drop table if exists articles; +create table articles (id int primary key, title varchar(200), body text); +insert into articles values +(1, 'MySQL Tutorial', 'This tutorial provides a basic MySQL tutorial'), +(2, 'How To Use MySQL Well', 'After you went through a MySQL tutorial'), +(3, 'Optimizing MySQL', 'In this tutorial we will show how to optimize MySQL'), +(4, 'MySQL vs. PostgreSQL', 'This article compares MySQL and PostgreSQL'), +(5, 'MySQL Security', 'How to secure your MySQL database'); +select id, title from articles where match(title) against('MySQL tutorial'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title, body) against('MySQL tutorial'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); +id title +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('Optim*' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'Optim*' is not supported in the LIKE fallback' +select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); +id title +5 MySQL Security +select id, title from articles where match(title) against('tutorial security' in boolean mode); +id title +1 MySQL Tutorial +5 MySQL Security +select id, title from articles where match(title) against(''); +id title +set @@tidb_opt_enable_alternative_logical_plans=OFF; +select id, title from articles where match(title) against('MySQL'); +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +set @@tidb_opt_enable_alternative_logical_plans=ON; +select id, title from articles where match(title) against('MySQL'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +drop table if exists special_chars; +create table special_chars (id int primary key, content varchar(200)); +insert into special_chars values +(1, 'Progress is at 100%'), +(2, 'Progress is at 50%'), +(3, 'File name is test_file.txt'), +(4, 'Path is C:\\Windows\\System32'), +(5, 'Normal text without special chars'); +select id, content from special_chars where match(content) against('100%'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '100%' is not supported in the LIKE fallback' +select id, content from special_chars where match(content) against('test_file'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'test_file' is not supported in the LIKE fallback' +select id, content from special_chars where match(content) against('C:\\Windows'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'C:\Windows' is not supported in the LIKE fallback' +select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+100%' is not supported in the LIKE fallback' +drop table if exists special_chars; +select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); +id title +select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('+MySQL +tutorial +-Security' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+*' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); +id title +1 MySQL Tutorial +3 Optimizing MySQL +select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"How' is not supported in the LIKE fallback' +select id, title from articles where match(title) against(' +'); +id title +select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '-"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+"How' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('tutorial -Security' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); +id title +1 MySQL Tutorial +4 MySQL vs. PostgreSQL +select id, title from articles where match(title) against('MySQL, PostgreSQL.'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'MySQL,' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('>MySQL MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against(NULL); +id title +select id, title from articles where not match(title) against(NULL); +id title +select id, title from articles where (match(title) against(NULL)); +id title +select id, title from articles where match(title) against('~Security ~PostgreSQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '~Security' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('MySQL' with query expansion); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback' +select id, match(title) against('MySQL') as score from articles; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles order by match(title) against('MySQL') desc; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles group by id, title having match(title) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +select a.id, a.title from articles a inner join articles a2 on a.id = a2.id and match(a.title) against('Security'); +id title +5 MySQL Security +select id from articles where match(id) against('MySQL'); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id from articles where match(id) against('xx-yy'); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id from articles where match(id) against(NULL); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id, title from articles +where match(title) against('MySQL') and match(body) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +select id, title from articles where not match(title) against('MySQL'); +id title +select id, title from articles where (match(title) against('MySQL')); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where (match(title) against('MySQL')) is null; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (match(title) against('MySQL')) > 0.5; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (match(title) against('MySQL')) = 0; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (case when match(title) against('MySQL') then 1 else 0 end) = 1; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_lit from 'select id, title from articles where match(title) against(''MySQL'')'; +execute st_fts_lit; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +execute st_fts_lit; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select @@last_plan_from_cache; +@@last_plan_from_cache +1 +deallocate prepare st_fts_lit; +set @@tidb_enable_prepared_plan_cache=DEFAULT; +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts from 'select id, title from articles where match(title) against(?)'; +set @q='MySQL'; +execute st_fts using @q; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +execute st_fts using @q; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select @@last_plan_from_cache; +@@last_plan_from_cache +0 +set @q='PostgreSQL'; +execute st_fts using @q; +id title +4 MySQL vs. PostgreSQL +deallocate prepare st_fts; +set @@tidb_enable_prepared_plan_cache=DEFAULT; +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_null from 'select id, title from articles where match(title) against(?)'; +set @q = NULL; +execute st_fts_null using @q; +id title +set @q = 'PostgreSQL'; +execute st_fts_null using @q; +id title +4 MySQL vs. PostgreSQL +select @@last_plan_from_cache; +@@last_plan_from_cache +0 +deallocate prepare st_fts_null; +set @@tidb_enable_prepared_plan_cache=DEFAULT; +select id, match(title) against('+MySQL' in boolean mode) as score from articles; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +select id, title from articles where (match(title) against('+MySQL' in boolean mode)) is null; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +set @@tidb_opt_enable_alternative_logical_plans=OFF; +select id, title from articles where match(title) against('+MySQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +set @@tidb_opt_enable_alternative_logical_plans=ON; +set @@tidb_opt_enable_alternative_logical_plans=OFF; +drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test new file mode 100644 index 0000000000000..f1b64c52ce563 --- /dev/null +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -0,0 +1,321 @@ +# Test cases for MATCH...AGAINST to LIKE conversion via alternative logical plans + +# Setup +set tidb_cost_model_version=1; +set @@tidb_opt_enable_alternative_logical_plans=ON; +drop table if exists articles; +create table articles (id int primary key, title varchar(200), body text); +insert into articles values + (1, 'MySQL Tutorial', 'This tutorial provides a basic MySQL tutorial'), + (2, 'How To Use MySQL Well', 'After you went through a MySQL tutorial'), + (3, 'Optimizing MySQL', 'In this tutorial we will show how to optimize MySQL'), + (4, 'MySQL vs. PostgreSQL', 'This article compares MySQL and PostgreSQL'), + (5, 'MySQL Security', 'How to secure your MySQL database'); + +# Test 1: Natural Language Mode - Single Column +select id, title from articles where match(title) against('MySQL tutorial'); + +# Test 2: Natural Language Mode - Multiple Columns +select id, title from articles where match(title, body) against('MySQL tutorial'); + +# Test 3: Boolean Mode - Required Terms +select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); + +# Test 4: Boolean Mode - Excluded Terms +select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); + +# Test 5: Boolean Mode - Prefix Wildcard is rejected by the strict subset (LIKE +# cannot enforce word-start boundaries; MySQL FTS would only match words +# starting with the prefix). Falls back to native FTS path; without an FTS +# index, surfaces the rewrite error. +-- error 1235 +select id, title from articles where match(title) against('Optim*' in boolean mode); + +# Test 6: Boolean Mode - Exact Phrase is rejected by the strict subset (LIKE +# cannot enforce word boundaries inside a phrase). Falls back to native FTS +# path; without an FTS index, surfaces the rewrite error. +-- error 1235 +select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); + +# Test 7: Boolean Mode - Complex Query +select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); + +# Test 8: Boolean Mode - Optional Terms +select id, title from articles where match(title) against('tutorial security' in boolean mode); + +# Test 9: Empty Search String +select id, title from articles where match(title) against(''); + +# Test 10: Test without alternative plans (native FTS path - errors without TiFlash) +set @@tidb_opt_enable_alternative_logical_plans=OFF; +-- error 1105 +select id, title from articles where match(title) against('MySQL'); + +# Test 11: Switch back to alternative plans mode +set @@tidb_opt_enable_alternative_logical_plans=ON; +select id, title from articles where match(title) against('MySQL'); + +# Test 12: Natural Language Mode with single word +select id, title from articles where match(title) against('PostgreSQL'); + +# Test 13: Special characters in search strings are rejected by the strict +# subset (MySQL FTS treats %, _, \, : etc. as word separators or operators, +# so a substring LIKE on them would produce results inconsistent with MySQL +# FTS tokenization). Each rejection falls back to the native FTS path; without +# an FTS index, surfaces the rewrite error. +drop table if exists special_chars; +create table special_chars (id int primary key, content varchar(200)); +insert into special_chars values + (1, 'Progress is at 100%'), + (2, 'Progress is at 50%'), + (3, 'File name is test_file.txt'), + (4, 'Path is C:\\Windows\\System32'), + (5, 'Normal text without special chars'); + +-- error 1235 +select id, content from special_chars where match(content) against('100%'); + +-- error 1235 +select id, content from special_chars where match(content) against('test_file'); + +-- error 1235 +select id, content from special_chars where match(content) against('C:\\Windows'); + +-- error 1235 +select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); + +drop table if exists special_chars; + +# Test 14: Boolean mode - only excluded terms (no required/optional) +select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); + +# Test 15: Boolean mode - quote is rejected by the strict subset. +-- error 1235 +select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); + +# Test 16: Boolean mode - mixed whitespace (tabs and newlines) +select id, title from articles where match(title) against('+MySQL +tutorial +-Security' in boolean mode); + +# Test 17: Boolean mode - `*` is rejected by the strict subset. +-- error 1235 +select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); + +# Test 18: Boolean mode - multiple excluded terms +select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); + +# Test 19: Boolean mode - mixed `*` and quoted phrase are rejected. +-- error 1235 +select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); + +# Test 20: Natural language mode - only whitespace +select id, title from articles where match(title) against(' + '); + +# Test 21: Natural language mode - multiple spaces between words +select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); + +# Test 22: Boolean mode - required phrase rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); + +# Test 23: Boolean mode - excluded phrase rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); + +# Test 24: Boolean mode - phrase mixed with words rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); + +# Test 25: Boolean mode - optional + excluded (optional treated as required filter) +select id, title from articles where match(title) against('tutorial -Security' in boolean mode); + +# Test 26: Boolean mode - optional + excluded with multiple optionals +select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); + +# Test 27: Natural language mode - punctuation in tokens rejected by strict +# subset. MySQL FTS would tokenize away the punctuation, but a substring LIKE +# would include it, so we refuse the rewrite. +-- error 1235 +select id, title from articles where match(title) against('MySQL, PostgreSQL.'); + +# Test 28: Boolean mode - relevance modifiers > < rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('>MySQL 0.5; + +# Test 36g: Scalar-position MATCH — explicit comparison to 0. Native returns +# the score (≥0). Coincidentally a LIKE 0/1 result agrees on "no match" +# rows, but we still route to native to preserve relevance-score semantics +# uniformly across scalar positions. +-- error 1105 +select id, title from articles where (match(title) against('MySQL')) = 0; + +# Test 36h: Scalar-position MATCH inside CASE WHEN. The WHEN expression takes +# a boolean condition, but the MATCH is buried under the CASE node, which is +# a non-boolean ancestor. Falls through to native. +-- error 1105 +select id, title from articles where (case when match(title) against('MySQL') then 1 else 0 end) = 1; + +# Test 37: Plan cache - prepared statement with literal AGAINST IS cacheable. +# The LIKE rewrite bakes the search string into pattern constants; for a true +# literal those constants are stable across executions, so the plan must be +# cacheable. Verifies the LIKE fallback only skips plan cache when the AGAINST +# constant is mutable (param marker / deferred expr), not for plain literals. +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_lit from 'select id, title from articles where match(title) against(''MySQL'')'; +execute st_fts_lit; +execute st_fts_lit; +select @@last_plan_from_cache; +deallocate prepare st_fts_lit; +set @@tidb_enable_prepared_plan_cache=DEFAULT; + +# Test 38: Plan cache - prepared statement with ? in AGAINST must NOT cache. +# A param marker is mutable across executions; baking the first execution's +# pattern would silently produce wrong results when the bind value changes. +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts from 'select id, title from articles where match(title) against(?)'; +set @q='MySQL'; +execute st_fts using @q; +execute st_fts using @q; +select @@last_plan_from_cache; +# Bind a different value to confirm results stay correct under the non-cached plan. +set @q='PostgreSQL'; +execute st_fts using @q; +deallocate prepare st_fts; +set @@tidb_enable_prepared_plan_cache=DEFAULT; + +# Test 38a: Prepared statement with a NULL first bind followed by a non-NULL +# bind. Pre-fix the LIKE fallback's NULL fast-path emitted Constant(0) and +# ran BEFORE the plan-cache skip check, so the prepared plan could cache a +# constant-false plan that a later non-NULL bind would silently reuse, +# returning zero rows instead of the matching ones. The fix moves the +# plan-cache skip ahead of the NULL fast-path (so mutable AGAINST always +# disables caching) and changes the NULL emission to Constant(NULL) (so +# NULL three-valued logic is preserved). After the fix, the second execute +# must return the PostgreSQL row, and @@last_plan_from_cache must be 0. +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_null from 'select id, title from articles where match(title) against(?)'; +set @q = NULL; +execute st_fts_null using @q; +set @q = 'PostgreSQL'; +execute st_fts_null using @q; +select @@last_plan_from_cache; +deallocate prepare st_fts_null; +set @@tidb_enable_prepared_plan_cache=DEFAULT; + +# Note: user variables in AGAINST (e.g., AGAINST(@search)) are rejected at +# rewrite time as a non-constant search string, so they never reach the +# plan-cache decision and need no separate cache-skip coverage here. + +# Test 39: Non-default modifier in a scoring context (SELECT field). LIKE +# cannot produce a float relevance score so it can't rescue this; the modifier +# guard in matchAgainstToBuiltin must error at plan time rather than emit a +# native FTS expression that TiFlash would silently execute as natural-language +# mode (the tipb pushdown protocol drops the modifier). +-- error 1235 +select id, match(title) against('+MySQL' in boolean mode) as score from articles; + +# Test 40: Non-default modifier in ORDER BY (scoring context). Same guard. +-- error 1235 +select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; + +# Test 41: Non-default modifier in a scalar predicate position (IS NULL). +# Even with alternative logical plans enabled, the LIKE round only rewrites +# direct-boolean MATCHes; the scalar position falls through to native, which +# must reject the modifier rather than mistranslate it on TiFlash. +-- error 1235 +select id, title from articles where (match(title) against('+MySQL' in boolean mode)) is null; + +# Test 42: Boolean mode in WHERE with alternative logical plans disabled. +# Without the fts-like-fallback rescue, native is the final plan, so the +# modifier guard must fire. (Pre-PR this query would push to TiFlash and +# silently execute as natural-language mode.) +set @@tidb_opt_enable_alternative_logical_plans=OFF; +-- error 1235 +select id, title from articles where match(title) against('+MySQL' in boolean mode); +set @@tidb_opt_enable_alternative_logical_plans=ON; + +# Cleanup +set @@tidb_opt_enable_alternative_logical_plans=OFF; +drop table if exists articles;