diff --git a/.gitignore b/.gitignore index 1bd76a0654693..846b3c01f7e02 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,7 @@ var # Personal config files /*config.toml .cache + +# Claude Code runtime state (per-user, not part of repo) +.claude/scheduled_tasks.lock +.claude/settings.local.json diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index 4ff8cdac2f33f..039c810a79eb3 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -107,7 +107,7 @@ go_test( srcs = ["main_test.go"], embed = [":tidb-server_lib"], flaky = True, - shard_count = 6, + shard_count = 7, deps = [ "//pkg/config", "//pkg/config/deploymode", diff --git a/pkg/expression/BUILD.bazel b/pkg/expression/BUILD.bazel index 3bd1edf787ac1..eb0bb0e32871d 100644 --- a/pkg/expression/BUILD.bazel +++ b/pkg/expression/BUILD.bazel @@ -65,6 +65,7 @@ go_library( "expression.go", "extension.go", "fts_helper.go", + "fts_to_like.go", "function_traits.go", "grouping_sets.go", "helper.go", @@ -199,6 +200,7 @@ go_test( "evaluator_test.go", "expr_to_pb_test.go", "expression_test.go", + "fts_to_like_test.go", "function_traits_test.go", "grouping_sets_test.go", "helper_test.go", diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index 86e3bfbbca639..7a2256733034a 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -980,7 +980,8 @@ var funcs = map[string]functionClass{ ast.VecAsText: &vecAsTextFunctionClass{baseFunctionClass{ast.VecAsText, 1, 1}}, // fts functions - ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, 2}}, + ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, 2}}, + ast.FTSMysqlMatchAgainst: &ftsMysqlMatchAgainstFunctionClass{baseFunctionClass{ast.FTSMysqlMatchAgainst, 2, -1}}, // TiDB internal function. ast.TiDBDecodeKey: &tidbDecodeKeyFunctionClass{baseFunctionClass{ast.TiDBDecodeKey, 1, 1}}, diff --git a/pkg/expression/builtin_fts.go b/pkg/expression/builtin_fts.go index 15cef850f05b2..430b0dabb371e 100644 --- a/pkg/expression/builtin_fts.go +++ b/pkg/expression/builtin_fts.go @@ -16,6 +16,7 @@ package expression import ( "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tipb/go-tipb" @@ -23,10 +24,12 @@ import ( var ( _ functionClass = &ftsMatchWordFunctionClass{} + _ functionClass = &ftsMysqlMatchAgainstFunctionClass{} ) var ( _ builtinFunc = &builtinFtsMatchWordSig{} + _ builtinFunc = &builtinFtsMysqlMatchAgainstSig{} ) type ftsMatchWordFunctionClass struct { @@ -37,12 +40,43 @@ type builtinFtsMatchWordSig struct { baseBuiltinFunc } +type ftsMysqlMatchAgainstFunctionClass struct { + baseFunctionClass +} + +type builtinFtsMysqlMatchAgainstSig struct { + baseBuiltinFunc + modifier ast.FulltextSearchModifier +} + func (b *builtinFtsMatchWordSig) Clone() builtinFunc { newSig := &builtinFtsMatchWordSig{} newSig.cloneFrom(&b.baseBuiltinFunc) return newSig } +func (b *builtinFtsMysqlMatchAgainstSig) Clone() builtinFunc { + newSig := &builtinFtsMysqlMatchAgainstSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + newSig.modifier = b.modifier + return newSig +} + +func (b *builtinFtsMysqlMatchAgainstSig) SetModifier(modifier ast.FulltextSearchModifier) { + b.modifier = modifier +} + +// SetFTSMysqlMatchAgainstModifier sets the modifier for the internal `MATCH ... AGAINST` builtin signature. +// It is expected to be called by planner right after building the scalar function. +func SetFTSMysqlMatchAgainstModifier(sf *ScalarFunction, modifier ast.FulltextSearchModifier) error { + sig, ok := sf.Function.(*builtinFtsMysqlMatchAgainstSig) + if !ok { + return errors.Errorf("unexpected builtin signature for %s: %T", ast.FTSMysqlMatchAgainst, sf.Function) + } + sig.SetModifier(modifier) + return nil +} + func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { if err := c.verifyArgs(args); err != nil { return nil, err @@ -81,3 +115,53 @@ func (b *builtinFtsMatchWordSig) evalReal(ctx EvalContext, row chunk.Row) (float // Reject executing match against in TiDB side. return 0, false, errors.Errorf("cannot use 'FTS_MATCH_WORD()' outside of fulltext index") } + +func (c *ftsMysqlMatchAgainstFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + + argAgainst := args[0] + argAgainstConstant, ok := argAgainst.(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") + } + if argAgainstConstant.Value.Kind() != types.KindString && !argAgainstConstant.Value.IsNull() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-string constant") + } + + argsMatch := args[1:] + for _, arg := range argsMatch { + _, ok := arg.(*Column) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("not matching a column") + } + } + + argTps := make([]types.EvalType, 0, len(args)) + argTps = append(argTps, types.ETString) + for _, arg := range argsMatch { + if arg.GetType(ctx.GetEvalCtx()).EvalType() != types.ETString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + } + argTps = append(argTps, types.ETString) + } + + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETReal, argTps...) + if err != nil { + return nil, err + } + + sig := &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: bf} + sig.setPbCode(tipb.ScalarFuncSig_FTSMatchExpression) + return sig, nil +} + +func (b *builtinFtsMysqlMatchAgainstSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { + // args[0] is validated to be a *Constant by getFunction; guard defensively + // since the sig may be reconstructed via the distsql path without that check. + if constArg, ok := b.args[0].(*Constant); ok && constArg.Value.IsNull() { + return 0, true, nil + } + return 0, false, errors.Errorf("cannot use 'MATCH ... AGAINST' outside of fulltext index") +} diff --git a/pkg/expression/builtin_threadunsafe_generated.go b/pkg/expression/builtin_threadunsafe_generated.go index 6f28645e4a349..2140fab768df1 100644 --- a/pkg/expression/builtin_threadunsafe_generated.go +++ b/pkg/expression/builtin_threadunsafe_generated.go @@ -81,6 +81,11 @@ func (s *builtinValidatePasswordStrengthSig) SafeToShareAcrossSession() bool { return false } +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFtsMysqlMatchAgainstSig) SafeToShareAcrossSession() bool { + return false +} + // SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. func (s *builtinIlikeSig) SafeToShareAcrossSession() bool { return false diff --git a/pkg/expression/distsql_builtin.go b/pkg/expression/distsql_builtin.go index f942c037f1463..14f82e1b9609e 100644 --- a/pkg/expression/distsql_builtin.go +++ b/pkg/expression/distsql_builtin.go @@ -1158,6 +1158,13 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie f = &builtinVecL2NormSig{base} case tipb.ScalarFuncSig_FTSMatchWord: f = &builtinFtsMatchWordSig{base} + case tipb.ScalarFuncSig_FTSMatchExpression: + // NOTE: builtinFtsMysqlMatchAgainstSig.modifier is not serialized in the + // protobuf encoding because the tipb schema has no FTS metadata message. + // The reconstructed sig therefore uses the zero modifier value + // (FulltextSearchModifierNaturalLanguageMode). TiFlash must derive the + // search mode from other context when executing this expression. + f = &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: base} default: e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode) return nil, e diff --git a/pkg/expression/fts_to_like.go b/pkg/expression/fts_to_like.go new file mode 100644 index 0000000000000..19e46dbbbe075 --- /dev/null +++ b/pkg/expression/fts_to_like.go @@ -0,0 +1,438 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" +) + +// ftsSearchTerm represents a single token in a boolean-mode FTS search string +// surviving the strict-subset validator: a plain alphanumeric word optionally +// prefixed with `+` (required) or `-` (excluded). +type ftsSearchTerm struct { + word string + isRequired bool + isExcluded bool +} + +// parseFTSBooleanSearchString splits a boolean-mode search string into terms. +// Inputs reach this function only after ValidateFTSSearchStringForLikeFallback +// has accepted them, so every whitespace-separated field is either a bare +// alphanumeric word or `+word`/`-word`. +func parseFTSBooleanSearchString(text string) []ftsSearchTerm { + fields := strings.Fields(text) + if len(fields) == 0 { + return nil + } + terms := make([]ftsSearchTerm, 0, len(fields)) + for _, w := range fields { + terms = append(terms, parseFTSSearchTerm(w)) + } + return terms +} + +// parseFTSSearchTerm parses a single boolean-mode token. The strict-subset +// validator guarantees `word`, `+word`, or `-word` with an alphanumeric body, +// so only the leading operator needs interpretation. +func parseFTSSearchTerm(word string) ftsSearchTerm { + if word == "" { + return ftsSearchTerm{} + } + switch word[0] { + case '+': + return ftsSearchTerm{word: word[1:], isRequired: true} + case '-': + return ftsSearchTerm{word: word[1:], isExcluded: true} + } + return ftsSearchTerm{word: word} +} + +// isFTSWordByte returns true for alphanumeric ASCII and non-ASCII bytes. +// Punctuation including underscore is NOT a word character, consistent with +// MySQL's built-in FTS tokenizer which treats _ as a word separator. Used by +// ValidateFTSSearchStringForLikeFallback to gate the LIKE rewrite. +func isFTSWordByte(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 +} + +// escapeFTSLikePattern escapes special LIKE characters (%, _, \) in the search term +// so they are treated as literal characters rather than wildcards. +func escapeFTSLikePattern(term string) string { + // Count special characters to pre-allocate the exact buffer size needed + escapeCount := 0 + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + escapeCount++ + } + } + + // Allocate exact size: original length + number of escape characters + var result strings.Builder + result.Grow(len(term) + escapeCount) + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + result.WriteByte('\\') + } + result.WriteByte(ch) + } + return result.String() +} + +// ValidateFTSSearchStringForLikeFallback reports whether searchText falls +// inside the strict subset that the LIKE fallback is allowed to translate. +// The supported subset is, by mode: +// +// - Boolean mode: each whitespace-separated token must be `word`, `+word`, +// or `-word`, where `word` consists of ASCII alphanumeric characters or +// non-ASCII UTF-8 bytes (the same definition used by isFTSWordByte). +// - Natural-language mode: each whitespace-separated token must be a `word` +// of the same alphanumeric form (no leading +/- operators). +// +// An empty or whitespace-only search string is valid; BuildFTSToILikeExpression +// short-circuits to a constant-0 result for it. +// +// Anything outside this subset (phrases, * prefix, > < ~ relevance modifiers, +// () grouping, mid-word punctuation like `xx-yy`, etc.) is rejected because +// MySQL FTS tokenizes those constructs in ways that differ from a substring +// LIKE match. The planner uses this signal to skip the LIKE fallback for +// rejected strings; the native FTSMysqlMatchAgainst builtin can still serve +// the query when an FTS index is available. +func ValidateFTSSearchStringForLikeFallback(searchText string, modifier ast.FulltextSearchModifier) error { + isBoolean := modifier.IsBooleanMode() + for _, token := range strings.Fields(searchText) { + body := token + // strings.Fields never returns an empty token (consecutive whitespace + // is collapsed), so body[0] is safe today. Keep the len(body) > 0 + // guard explicit so the indexing is obviously bounded and the check + // stays correct if the tokenization ever changes. + if isBoolean && len(body) > 0 && (body[0] == '+' || body[0] == '-') { + body = body[1:] + } + if body == "" { + return ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST search term '" + token + "' is not supported in the LIKE fallback") + } + for i := range len(body) { + if !isFTSWordByte(body[i]) { + return ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST search term '" + token + "' is not supported in the LIKE fallback") + } + } + } + return nil +} + +// BuildFTSToILikeExpression converts a MATCH...AGAINST input (a list of column +// expressions, the search-string literal, and the parsed modifier) into an +// equivalent ILIKE-based predicate expression. +// +// Two callers share this conversion: +// - the planner's MATCH...AGAINST LIKE fallback rewrite, used by the +// "fts-like-fallback" alternative round when round 1 reports that the +// native FTSMysqlMatchAgainst builtin cannot serve a predicate-context +// MATCH (no FTS index on a TiFlash replica, modifier not pushdown-supported); +// - selectivity estimation, which substitutes the same ILIKE form for the +// opaque FTSMysqlMatchAgainst builtin so round 1's cost is computed from +// column statistics rather than a flat default — the native builtin +// cannot be evaluated in TiDB and would otherwise fall through to a +// SelectivityFactor (0.8) that ignores the column's histogram. +// +// Returns an integer (0/1) typed expression suitable for direct use as a +// filter predicate. +// +// Semantic differences from MySQL's full-text search are documented in detail +// at the planner-level call site; this helper preserves those approximations +// so both callers see the same translated expression. +func BuildFTSToILikeExpression( + ctx BuildContext, + columns []Expression, + searchText string, + modifier ast.FulltextSearchModifier, +) (Expression, error) { + if len(columns) == 0 { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with no columns") + } + + // WITH QUERY EXPANSION requires a second FTS pass to find semantically related + // terms; LIKE cannot approximate this. Error explicitly rather than silently + // producing wrong results. + if modifier.WithQueryExpansion() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback") + } + + // Reject search strings outside the strict supported subset before we + // translate. Callers that want a graceful fallback (e.g. the planner + // redirecting to the native builtin, or selectivity estimation falling + // through to a default estimate) should call this validator directly and + // react to its error. + if err := ValidateFTSSearchStringForLikeFallback(searchText, modifier); err != nil { + return nil, err + } + + if searchText == "" { + return ftsZeroIntConst(), nil + } + + if modifier.IsBooleanMode() { + return buildFTSBooleanModeILikeExpression(ctx, columns, searchText) + } + if modifier.IsNaturalLanguageMode() { + return buildFTSNaturalLanguageModeILikeExpression(ctx, columns, searchText) + } + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST modifier is not supported in the LIKE fallback") +} + +// ftsZeroIntConst returns the constant-0 tiny-int expression used whenever +// the LIKE fallback can prove no row will match (empty search string, all +// terms tokenized away, or boolean-mode "only excluded" queries). +func ftsZeroIntConst() Expression { + return &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } +} + +// buildFTSBooleanModeILikeExpression handles `IN BOOLEAN MODE`. Required +// terms become an AND of per-term column-DNFs, excluded terms become NOT over +// per-term column-DNFs, and optional terms anchor the result only when no +// required terms exist (since LIKE cannot rank). +func buildFTSBooleanModeILikeExpression(ctx BuildContext, columns []Expression, searchText string) (Expression, error) { + terms := parseFTSBooleanSearchString(searchText) + if len(terms) == 0 { + return ftsZeroIntConst(), nil + } + + var required, excluded, optional []ftsSearchTerm + for _, term := range terms { + if term.word == "" { + continue + } + if term.isRequired { + required = append(required, term) + } else if term.isExcluded { + excluded = append(excluded, term) + } else { + optional = append(optional, term) + } + } + + // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns + // an empty result set. The LIKE fallback must match this: when there are + // no required and no optional terms, no row can possibly satisfy the + // search, so return a constant FALSE immediately. + if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { + return ftsZeroIntConst(), nil + } + + var allPredicates []Expression + + // For each required term: (col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range required { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + allPredicates = append(allPredicates, ComposeDNFCondition(ctx, termColumnPreds...)) + } + } + + // For each excluded term: NOT(col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range excluded { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + notPred, err := NewFunction(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), + ComposeDNFCondition(ctx, termColumnPreds...)) + if err != nil { + return nil, err + } + allPredicates = append(allPredicates, notPred) + } + } + + // For optional terms: since LIKE cannot rank, treat optionals as a + // positive filter when no required terms exist. + // - required>0: ignore optionals (required terms already anchor the result) + // - required==0, excluded==0: at least one optional must match (pure optional query) + // - required==0, excluded>0: at least one optional must match AND excluded terms + // must be absent; AND the optional-DNF into allPredicates below + if len(optional) > 0 && len(required) == 0 { + var allOptionalPreds []Expression + for _, term := range optional { + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + allOptionalPreds = append(allOptionalPreds, pred) + } + } + if len(allOptionalPreds) > 0 { + optionalDNF := ComposeDNFCondition(ctx, allOptionalPreds...) + if len(excluded) == 0 { + return optionalDNF, nil + } + allPredicates = append(allPredicates, optionalDNF) + } + } + + if len(allPredicates) == 0 { + return ftsZeroIntConst(), nil + } + + return ComposeCNFCondition(ctx, allPredicates...), nil +} + +// buildFTSNaturalLanguageModeILikeExpression handles the default +// natural-language mode by splitting the search string into whitespace +// tokens and OR-ing per-column per-word ILIKE predicates together. +func buildFTSNaturalLanguageModeILikeExpression(ctx BuildContext, columns []Expression, searchText string) (Expression, error) { + words := strings.Fields(searchText) + if len(words) == 0 { + return ftsZeroIntConst(), nil + } + + var columnPredicates []Expression + for _, column := range columns { + var wordPredicates []Expression + for _, word := range words { + pred, err := buildFTSILikePredicate(ctx, column, word) + if err != nil { + return nil, err + } + wordPredicates = append(wordPredicates, pred) + } + if len(wordPredicates) > 0 { + columnPredicates = append(columnPredicates, ComposeDNFCondition(ctx, wordPredicates...)) + } + } + + if len(columnPredicates) == 0 { + return ftsZeroIntConst(), nil + } + + return ComposeDNFCondition(ctx, columnPredicates...), nil +} + +// BuildFTSToILikeExpressionFromBuiltin pulls the search string and modifier +// out of a MATCH...AGAINST scalar function (FTSMysqlMatchAgainst) and +// delegates to BuildFTSToILikeExpression. It is the entry point for +// selectivity estimation, where the FTS scalar function is opaque to the +// stats engine; substituting an equivalent ILIKE expression lets the engine +// reuse its TopN/histogram-based estimation paths instead of falling back +// to a flat default that ignores column statistics. +// +// Restricted to single-column MATCH: GetSelectivityByFilter only estimates +// expressions over a single column, so a multi-column substituted ILIKE would +// be declined by the stats engine and fall through to the same str-match +// default that the un-substituted FTS expression already receives. Returning +// an error for the multi-column case lets the selectivity caller's existing +// err-check fall through cleanly, without producing a substitute that would +// never improve the estimate. +func BuildFTSToILikeExpressionFromBuiltin(ctx BuildContext, fts *ScalarFunction) (Expression, error) { + if fts == nil || fts.FuncName.L != ast.FTSMysqlMatchAgainst { + return nil, errors.Errorf("expected %s, got %v", ast.FTSMysqlMatchAgainst, fts) + } + args := fts.GetArgs() + if len(args) < 2 { + return nil, errors.Errorf("%s expects at least 2 args, got %d", ast.FTSMysqlMatchAgainst, len(args)) + } + if len(args) > 2 { + return nil, ErrNotSupportedYet.GenWithStackByArgs("multi-column MATCH...AGAINST in selectivity substitution") + } + againstConst, ok := args[0].(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") + } + if againstConst.Value.IsNull() { + // Match the planner-side matchAgainstToLike NULL fast-path: emit + // Constant(NULL) so the substitute preserves SQL three-valued logic + // even though selectivity estimation does not currently exploit the + // difference. Constant(0) here would, under any future cost path that + // composes NOT over the substitute, report "NOT 0 = TRUE → selectivity + // 1" — opposite of native MATCH(NULL) which returns NULL. + return &Constant{ + Value: types.Datum{}, + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + if againstConst.Value.Kind() != types.KindString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search constant") + } + sig, ok := fts.Function.(*builtinFtsMysqlMatchAgainstSig) + if !ok { + return nil, errors.Errorf("unexpected builtin signature for %s: %T", ast.FTSMysqlMatchAgainst, fts.Function) + } + return BuildFTSToILikeExpression(ctx, args[1:], againstConst.Value.GetString(), sig.modifier) +} + +// buildFTSILikePredicate builds a single ILIKE predicate for a column and search term, +// wrapped in IFNULL so that NULL columns are treated as not containing the term. +func buildFTSILikePredicate(ctx BuildContext, column Expression, term string) (Expression, error) { + escapedTerm := escapeFTSLikePattern(term) + + // NOTE: Prefix matching (word*) in MySQL full-text search matches words that START with + // the prefix, but the word can appear anywhere in the text. Using LIKE without REGEXP, + // we cannot perfectly enforce word-start boundaries. We use %term% which may produce + // false positives but avoids false negatives. + pattern := "%" + escapedTerm + "%" + + patternConst := &Constant{ + Value: types.NewStringDatum(pattern), + RetType: types.NewFieldType(mysql.TypeVarchar), + } + + // Backslash escape character (=92) for ILIKE. + escapeConst := &Constant{ + Value: types.NewIntDatum(92), + RetType: types.NewFieldType(mysql.TypeTiny), + } + + // MySQL full-text search is always case-insensitive regardless of column + // collation, so ILIKE matches that semantic rather than plain LIKE which + // would follow the column's collation. + likeFunc, err := NewFunction(ctx, ast.Ilike, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) + if err != nil { + return nil, err + } + + // Wrap with IFNULL so a NULL column is treated as not containing the term + // (consistent with MySQL FTS semantics where NULL columns are ignored). + // Without this, NOT(NULL ILIKE %term%) = NOT(NULL) = NULL which incorrectly + // filters rows that have a NULL column and don't contain the excluded term. + zeroConst := &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } + return NewFunction(ctx, ast.Ifnull, types.NewFieldType(mysql.TypeTiny), likeFunc, zeroConst) +} diff --git a/pkg/expression/fts_to_like_test.go b/pkg/expression/fts_to_like_test.go new file mode 100644 index 0000000000000..4f0581698c0a7 --- /dev/null +++ b/pkg/expression/fts_to_like_test.go @@ -0,0 +1,340 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/mock" + "github.com/stretchr/testify/require" +) + +func TestValidateFTSSearchStringForLikeFallback(t *testing.T) { + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + booleanMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode) + + tests := []struct { + name string + text string + modifier ast.FulltextSearchModifier + wantErr bool + }{ + // Natural-language mode: plain alphanumeric words only. + {name: "natural empty", text: "", modifier: naturalMode, wantErr: false}, + {name: "natural whitespace only", text: " \t\n ", modifier: naturalMode, wantErr: false}, + {name: "natural single word", text: "MySQL", modifier: naturalMode, wantErr: false}, + {name: "natural multi word", text: "MySQL tutorial PostgreSQL", modifier: naturalMode, wantErr: false}, + {name: "natural alphanumeric mix", text: "abc123 mysql8", modifier: naturalMode, wantErr: false}, + {name: "natural rejects mid-word dash", text: "x-x", modifier: naturalMode, wantErr: true}, + {name: "natural rejects punctuation suffix", text: "MySQL,", modifier: naturalMode, wantErr: true}, + {name: "natural rejects + operator", text: "+word", modifier: naturalMode, wantErr: true}, + {name: "natural rejects - operator", text: "-word", modifier: naturalMode, wantErr: true}, + {name: "natural rejects quote", text: `"phrase"`, modifier: naturalMode, wantErr: true}, + {name: "natural rejects wildcard", text: "word*", modifier: naturalMode, wantErr: true}, + {name: "natural rejects percent", text: "100%", modifier: naturalMode, wantErr: true}, + {name: "natural rejects underscore", text: "test_file", modifier: naturalMode, wantErr: true}, + + // Boolean mode: plain word, +word, -word with alphanumeric body only. + {name: "boolean empty", text: "", modifier: booleanMode, wantErr: false}, + {name: "boolean plain word", text: "MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean required word", text: "+MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean excluded word", text: "-MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean mix", text: "+apple -cherry pie", modifier: booleanMode, wantErr: false}, + {name: "boolean rejects mid-word dash", text: "xx-yy", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects bare operator", text: "+", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects bare minus", text: "-", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects + after body", text: "x+y", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects wildcard", text: "word*", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects required wildcard", text: "+word*", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects relevance gt", text: ">word", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects relevance lt", text: " 127 case). + {name: "natural utf8 word", text: "你好", modifier: naturalMode, wantErr: false}, + {name: "boolean utf8 word", text: "+你好", modifier: booleanMode, wantErr: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateFTSSearchStringForLikeFallback(tt.text, tt.modifier) + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +// TestParseFTSBooleanSearchString covers the strict-subset inputs the boolean +// parser is expected to handle in production. Inputs outside the subset +// (phrases, wildcards, relevance modifiers, mid-word punctuation, etc.) are +// rejected upstream by ValidateFTSSearchStringForLikeFallback and therefore +// never reach this parser. +func TestParseFTSBooleanSearchString(t *testing.T) { + tests := []struct { + input string + expected []ftsSearchTerm + }{ + { + input: "+apple +pie", + expected: []ftsSearchTerm{ + {word: "apple", isRequired: true}, + {word: "pie", isRequired: true}, + }, + }, + { + input: "+apple -cherry", + expected: []ftsSearchTerm{ + {word: "apple", isRequired: true}, + {word: "cherry", isExcluded: true}, + }, + }, + { + input: "word1 word2 word3", + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "word2"}, + {word: "word3"}, + }, + }, + { + input: "word1\t\nword2", + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "word2"}, + }, + }, + { + input: "", + expected: nil, + }, + { + input: " \t\n ", + expected: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseFTSBooleanSearchString(tt.input) + require.Equal(t, len(tt.expected), len(result), "Number of terms should match") + for i, expected := range tt.expected { + require.Equal(t, expected.word, result[i].word, "Word should match") + require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") + require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") + } + }) + } +} + +func TestParseFTSSearchTerm(t *testing.T) { + tests := []struct { + input string + expected ftsSearchTerm + }{ + {input: "+word", expected: ftsSearchTerm{word: "word", isRequired: true}}, + {input: "-word", expected: ftsSearchTerm{word: "word", isExcluded: true}}, + {input: "word", expected: ftsSearchTerm{word: "word"}}, + {input: "", expected: ftsSearchTerm{}}, + // Bare operator with no body (caller passes the result through; the + // upstream validator rejects this case before the parser sees it). + {input: "+", expected: ftsSearchTerm{word: "", isRequired: true}}, + {input: "-", expected: ftsSearchTerm{word: "", isExcluded: true}}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseFTSSearchTerm(tt.input) + require.Equal(t, tt.expected.word, result.word, "Word should match") + require.Equal(t, tt.expected.isRequired, result.isRequired, "isRequired should match") + require.Equal(t, tt.expected.isExcluded, result.isExcluded, "isExcluded should match") + }) + } +} + +func TestEscapeFTSLikePattern(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + input: "normal text", + expected: "normal text", + }, + { + input: "100%", + expected: "100\\%", + }, + { + input: "test_file", + expected: "test\\_file", + }, + { + input: "path\\to\\file", + expected: "path\\\\to\\\\file", + }, + { + input: "mix_%_all", + expected: "mix\\_\\%\\_all", + }, + { + input: "\\%_", + expected: "\\\\\\%\\_", + }, + { + input: "", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := escapeFTSLikePattern(tt.input) + require.Equal(t, tt.expected, result, "Escaped pattern should match") + }) + } +} + +// newFTSMatchAgainstForTest builds a real FTSMysqlMatchAgainst ScalarFunction +// suitable for exercising BuildFTSToILikeExpressionFromBuiltin. It mirrors +// the planner's matchAgainstToBuiltin flow: build via NewFunction with a +// string Constant for AGAINST and one or more string Columns for MATCH, +// then attach the modifier via SetFTSMysqlMatchAgainstModifier. +func newFTSMatchAgainstForTest(t *testing.T, ctx BuildContext, search string, numCols int, modifier ast.FulltextSearchModifier) *ScalarFunction { + t.Helper() + stringTp := types.NewFieldType(mysql.TypeVarchar) + stringTp.SetCollate(mysql.DefaultCollationName) + args := make([]Expression, 0, 1+numCols) + args = append(args, &Constant{Value: types.NewStringDatum(search), RetType: stringTp}) + for i := range numCols { + args = append(args, &Column{Index: i, RetType: stringTp}) + } + fn, err := NewFunction(ctx, ast.FTSMysqlMatchAgainst, types.NewFieldType(mysql.TypeDouble), args...) + require.NoError(t, err) + sf, ok := fn.(*ScalarFunction) + require.True(t, ok) + require.NoError(t, SetFTSMysqlMatchAgainstModifier(sf, modifier)) + return sf +} + +func TestBuildFTSToILikeExpressionFromBuiltin(t *testing.T) { + ctx := mock.NewContext() + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + + t.Run("nil scalar function", func(t *testing.T) { + _, err := BuildFTSToILikeExpressionFromBuiltin(ctx, nil) + require.Error(t, err) + }) + + t.Run("wrong function name", func(t *testing.T) { + // Construct a non-FTS ScalarFunction by reusing one we know exists. + stringTp := types.NewFieldType(mysql.TypeVarchar) + col := &Column{Index: 0, RetType: stringTp} + other, err := NewFunction(ctx, ast.Length, types.NewFieldType(mysql.TypeLonglong), col) + require.NoError(t, err) + _, err = BuildFTSToILikeExpressionFromBuiltin(ctx, other.(*ScalarFunction)) + require.Error(t, err) + require.Contains(t, err.Error(), ast.FTSMysqlMatchAgainst) + }) + + t.Run("single-column natural-language succeeds", func(t *testing.T) { + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 1, naturalMode) + expr, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.NoError(t, err) + require.NotNil(t, expr) + // The result should be a scalar function (IFNULL(ILIKE,...)) — not the + // untranslated FTS opaque builtin. + resultSF, ok := expr.(*ScalarFunction) + require.True(t, ok) + require.NotEqual(t, ast.FTSMysqlMatchAgainst, resultSF.FuncName.L) + }) + + t.Run("multi-column rejected for selectivity substitution", func(t *testing.T) { + // GetSelectivityByFilter declines expressions over more than one column, + // so a multi-column substituted ILIKE would never improve the estimate. + // BuildFTSToILikeExpressionFromBuiltin returns an error to keep that + // path explicit; the selectivity caller's err-check then falls through + // to the str-match default cleanly. + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 2, naturalMode) + _, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.Error(t, err) + require.Contains(t, err.Error(), "multi-column") + }) + + t.Run("NULL search constant returns Constant(NULL)", func(t *testing.T) { + // The builtin's getFunction allows NULL search constants explicitly + // (builtin_fts.go:129); the substitution short-circuits to Constant(NULL) + // rather than Constant(0) so it composes correctly under SQL three-valued + // logic and matches the planner-side matchAgainstToLike NULL fast-path. + stringTp := types.NewFieldType(mysql.TypeVarchar) + nullArg := &Constant{Value: types.NewDatum(nil), RetType: stringTp} + col := &Column{Index: 0, RetType: stringTp} + fn, err := NewFunction(ctx, ast.FTSMysqlMatchAgainst, types.NewFieldType(mysql.TypeDouble), nullArg, col) + require.NoError(t, err) + sf := fn.(*ScalarFunction) + require.NoError(t, SetFTSMysqlMatchAgainstModifier(sf, naturalMode)) + + expr, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.NoError(t, err) + c, ok := expr.(*Constant) + require.True(t, ok) + require.True(t, c.Value.IsNull(), "expected Constant(NULL), got %v", c.Value) + }) + + t.Run("search string outside strict subset rejected", func(t *testing.T) { + // Search string with mid-word `-` fails ValidateFTSSearchStringForLikeFallback + // and propagates that rejection through BuildFTSToILikeExpression. + sf := newFTSMatchAgainstForTest(t, ctx, "xx-yy", 1, naturalMode) + _, err := BuildFTSToILikeExpressionFromBuiltin(ctx, sf) + require.Error(t, err) + }) +} + +func TestScalarExprSupportedByFlashRejectsNonDefaultFTSModifier(t *testing.T) { + // The tipb pushdown protocol does not serialize the FTS modifier; TiFlash + // reconstructs the signature with the default (natural-language) modifier. + // scalarExprSupportedByFlash must therefore mark non-default-modifier + // FTSMysqlMatchAgainst as NOT Flash-supported even though the function + // name is generally Flash-pushdown-eligible. This is defense in depth on + // top of the planner's modifier guard in matchAgainstToBuiltin. + ctx := mock.NewContext() + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + booleanMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode) + queryExpansion := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion) + + cases := []struct { + name string + modifier ast.FulltextSearchModifier + want bool + }{ + {"natural-language mode is Flash-supported", naturalMode, true}, + {"boolean mode is not Flash-supported", booleanMode, false}, + {"with-query-expansion is not Flash-supported", queryExpansion, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 1, tc.modifier) + require.Equal(t, tc.want, scalarExprSupportedByFlash(ctx.GetEvalCtx(), sf)) + }) + } +} diff --git a/pkg/expression/function_traits_test.go b/pkg/expression/function_traits_test.go index 8864e60478829..50768d6db29d8 100644 --- a/pkg/expression/function_traits_test.go +++ b/pkg/expression/function_traits_test.go @@ -186,6 +186,7 @@ func TestIllegalFunctions4GeneratedColumns(t *testing.T) { "make_set", "makedate", "maketime", + "match_against", "md5", "microsecond", "mid", diff --git a/pkg/expression/infer_pushdown.go b/pkg/expression/infer_pushdown.go index 6c6c35a208075..2e510af3220d6 100644 --- a/pkg/expression/infer_pushdown.go +++ b/pkg/expression/infer_pushdown.go @@ -452,6 +452,18 @@ func scalarExprSupportedByFlash(ctx EvalContext, function *ScalarFunction) bool return true case ast.FTSMatchWord: return true + case ast.FTSMysqlMatchAgainst: + // The tipb pushdown protocol (see distsql_builtin.go) does not + // serialize the FTS modifier; TiFlash defaults to natural-language + // mode on the reconstructed signature. Pushing a Boolean-mode or + // WITH QUERY EXPANSION call down would therefore silently execute + // with the modifier dropped. Mark such calls as not Flash-supported + // here as a defense in depth — the planner's modifier guard in + // matchAgainstToBuiltin already rejects them at plan time, but + // keeping pushdown self-consistent guards against any future code + // path that builds an FTSMysqlMatchAgainst around the planner. + sig, ok := function.Function.(*builtinFtsMysqlMatchAgainstSig) + return ok && !sig.modifier.IsBooleanMode() && !sig.modifier.WithQueryExpansion() case ast.Grouping: // grouping function for grouping sets identification. return true } diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 69ebb853c3e21..025418ae45686 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -207,7 +207,10 @@ func TestFTSSyntax(t *testing.T) { // tk.MustContainErrMsg("select * from t where (fts_match_word('hello', title)) > 0", "Currently 'FTS_MATCH_WORD()' must be used alone") // tk.MustContainErrMsg("select (fts_match_word('hello', title)) AS score from t where fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' cannot be used in SELECT fields") tk.MustContainErrMsg("select * from t where match() against ('hello')", `You have an error in your SQL syntax`) - tk.MustContainErrMsg("select * from t where match(title) against ('hello' in boolean mode)", `UnknownType: *ast.MatchAgainst`) + // Test MATCH...AGAINST with alternative plans - LIKE fallback competes on cost + tk.MustExec("set @@tidb_opt_enable_alternative_logical_plans=ON") + tk.MustQuery("select * from t where match(title) against ('hello' in boolean mode)") + tk.MustExec("set @@tidb_opt_enable_alternative_logical_plans=OFF") tk.MustContainErrMsg("select * from t where fts_match_word(title, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word(45.67, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word('hello', title, body)", `Incorrect parameter count in the call to native function`) diff --git a/pkg/parser/ast/functions.go b/pkg/parser/ast/functions.go index 7e0e453860593..949efbc4e7241 100644 --- a/pkg/parser/ast/functions.go +++ b/pkg/parser/ast/functions.go @@ -372,7 +372,8 @@ const ( VecAsText = "vec_as_text" // FTS functions (tidb extension) - FTSMatchWord = "fts_match_word" + FTSMatchWord = "fts_match_word" + FTSMysqlMatchAgainst = "match_against" // TiDB internal function. TiDBDecodeKey = "tidb_decode_key" diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index 131fe07abc5e5..c70f388c201e0 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -247,6 +247,48 @@ func Selectivity( case ast.Like, ast.Ilike, ast.Regexp, ast.RegexpLike: notCoveredStrMatch[i] = x continue + case ast.FTSMysqlMatchAgainst: + // FTSMysqlMatchAgainst is opaque to the stats engine — its + // evalReal errors when called outside TiFlash, so TopN-based + // estimation can't run on it directly and the generic fallback + // would use SelectivityFactor (0.8) regardless of column stats. + // Substitute the equivalent ILIKE-based expression so the cost + // of round 1's native plan reflects the column's histogram / + // TopN rather than the flat default — this affects join order, + // index selection, etc., even though round 1's plan is the + // only candidate when every predicate MATCH is native-viable + // (the fts-like-fallback round only fires when round 1 is + // discarded). + // + // The substitution only fires for single-column MATCH(...); + // GetSelectivityByFilter declines multi-column expressions, so a + // multi-column substitute would just fall through to the same + // str-match default that the un-substituted FTS expression already + // receives. BuildFTSToILikeExpressionFromBuiltin returns an error + // for the multi-column case to keep that path explicit here. + if substitute, err := expression.BuildFTSToILikeExpressionFromBuiltin(ctx.GetExprCtx(), x); err == nil { + switch sub := substitute.(type) { + case *expression.ScalarFunction: + notCoveredStrMatch[i] = sub + continue + case *expression.Constant: + // AGAINST(NULL) produces Constant(NULL) (preserves SQL + // three-valued logic — matches the planner-side + // matchAgainstToLike NULL fast-path); empty-string + // search produces Constant(0). Route either to the + // constants bucket so the stats engine recognizes the + // substitute as constant-false (the IsNull / ToBool + // pass at line ~309 zeroes selectivity for both + // shapes) instead of applying the str-match default + // (0.1). + notCoveredConstants[i] = sub + continue + } + } + // Fall through if substitution failed; the FTS expression will + // use the str-match default selectivity (0.1) instead of 0.8. + notCoveredStrMatch[i] = x + continue case ast.UnaryNot: inner := expression.GetExprInsideIsTruth(x.GetArgs()[0]) innerSF, ok := inner.(*expression.ScalarFunction) diff --git a/pkg/planner/core/BUILD.bazel b/pkg/planner/core/BUILD.bazel index 6befbeb6e212f..14d86cd0cf2e3 100644 --- a/pkg/planner/core/BUILD.bazel +++ b/pkg/planner/core/BUILD.bazel @@ -18,6 +18,7 @@ go_library( "expression_rewriter.go", "find_best_task.go", "flat_plan.go", + "fulltext_to_like.go", "hint_utils.go", "index_join_path.go", "indexmerge_path.go", @@ -217,6 +218,7 @@ go_test( "exhaust_physical_plans_test.go", "expression_test.go", "find_best_task_test.go", + "fulltext_to_like_test.go", "hint_test.go", "integration_test.go", "lateral_join_test.go", diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 368f250fd321a..39ccf9a5696fc 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -704,6 +704,67 @@ func (er *expressionRewriter) canTreatInSubqueryAsExistsForFilter(planCtx *exprR return true } +// inDirectMatchBooleanContext reports whether the MATCH...AGAINST currently +// being rewritten sits in a position where its boolean (0/1) result is +// directly consumed as a predicate — i.e. every ancestor up to the WHERE / +// HAVING / JOIN ON root is one of: parentheses, AND, OR, or NOT. +// +// Any other ancestor (comparison `= 0` / `> 0.5`, `IS NULL`, CASE, arithmetic, +// XOR, scalar function, etc.) means MATCH is being used as a scalar relevance +// score, where the LIKE rewrite's 0/1 output would diverge from the native +// float score and silently produce wrong rows. In those positions the +// rewriter must fall through to the native FTSMysqlMatchAgainst builtin, +// which preserves the relevance-score semantics (and errors at execution if +// no FTS index is available — the same behavior the user would see with +// alternative logical plans disabled). +func (er *expressionRewriter) inDirectMatchBooleanContext() bool { + if er.planCtx == nil { + return false + } + switch er.planCtx.builder.curClause { + case whereClause, havingClause, onClause: + default: + return false + } + if len(er.astNodeStack) == 0 { + return false + } + for i := len(er.astNodeStack) - 2; i >= 0; i-- { + switch n := er.astNodeStack[i].(type) { + case *ast.ParenthesesExpr: + case *ast.BinaryOperationExpr: + if n.Op != opcode.LogicAnd && n.Op != opcode.LogicOr { + return false + } + case *ast.UnaryOperationExpr: + if n.Op != opcode.Not && n.Op != opcode.Not2 { + return false + } + default: + return false + } + } + return true +} + +// matchHasLikeFallbackRescue reports whether matchAgainstToBuiltin is being +// invoked in a position where the alt-rounds driver will discard the produced +// plan and rebuild via the fts-like-fallback round. It is used by the modifier +// guard in matchAgainstToBuiltin to allow native emission of a non-default +// modifier when round 1's plan is destined for discard anyway. The rescue +// conditions mirror the ones in matchAgainstToExpression that trigger +// MarkNonViableFTSMatch — alternative logical plans enabled AND a direct +// boolean predicate context. +func (er *expressionRewriter) matchHasLikeFallbackRescue() bool { + if er.planCtx == nil || er.planCtx.builder == nil || er.planCtx.builder.ctx == nil { + return false + } + if !er.planCtx.builder.ctx.GetSessionVars().EnableAlternativeLogicalPlans { + return false + } + return er.inDirectMatchBooleanContext() +} + func (er *expressionRewriter) buildSemiApplyFromEqualSubq(np base.LogicalPlan, planCtx *exprRewriterPlanCtx, l, r expression.Expression, not, markNoDecorrelate bool) { intest.AssertNotNil(planCtx) if er.asScalar || not { @@ -1823,6 +1884,8 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok } er.ctxStack[len(er.ctxStack)-1].SetCoercibility(expression.CoercibilityExplicit) er.ctxStack[len(er.ctxStack)-1].SetCharsetAndCollation(arg.GetType(er.sctx.GetEvalCtx()).GetCharset(), arg.GetType(er.sctx.GetEvalCtx()).GetCollate()) + case *ast.MatchAgainst: + er.matchAgainstToExpression(v) default: er.err = errors.Errorf("UnknownType: %T", v) return retNode, false @@ -2362,6 +2425,294 @@ func (er *expressionRewriter) patternLikeOrIlikeToExpression(v *ast.PatternLikeO er.ctxStackAppend(function, types.EmptyName) } +func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { + // Both the column expressions and Against expression have been visited + // and pushed onto the ctxStack. The stack layout is: + // [..., col1, col2, ..., colN, against] + numCols := len(v.ColumnNames) + stackLen := len(er.ctxStack) + if stackLen < numCols+1 { + er.err = errors.Errorf("Unexpected stack length for MatchAgainst: %d", stackLen) + return + } + + // Default behavior (Alt-disabled or Alt-enabled round 1) is to emit the + // native FTSMysqlMatchAgainst builtin. The alternative-rounds driver flips + // AlternativeLogicalPlanFTSLikeFallback to true and re-runs the build + // only when round 1 reported a direct-boolean-context MATCH that the + // native builtin cannot serve (no FTS index on a TiFlash replica / + // modifier not pushdown-supported). In that second pass the rewriter + // emits ILIKE for direct-boolean-context MATCH only — scoring contexts + // (SELECT field list / ORDER BY) and scalar predicate positions + // (IS NULL, comparisons, CASE, arithmetic) need the float relevance + // score, so they keep using the native builtin and will error at + // execution if no FTS index exists there. + // + // "Direct boolean context" requires that every ancestor up to the + // WHERE/HAVING/ON root is AND/OR/NOT/parens — see inDirectMatchBooleanContext. + // Limiting the LIKE rewrite to that subset preserves the 0/1-vs-float + // distinction: in scalar positions, `MATCH(...) IS NULL`, `MATCH(...) > 0.5`, + // etc. would silently produce wrong rows if the LIKE rewrite's integer + // result were substituted for the native float score. + // + // Round 1 also has to record viability before committing to native: if + // any boolean-context MATCH is non-viable, the resulting plan would + // fail at execution. The rewriter records that on the planBuilder so the + // round driver can invalidate the plan and trigger the fallback round. + // Round 1 additionally records that a direct-boolean-context MATCH was + // seen so the driver runs the LIKE round for cost competition even when + // round 1's native plan is executable. + useLikeFallback := false + if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { + sessVars := er.planCtx.builder.ctx.GetSessionVars() + if er.inDirectMatchBooleanContext() { + if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { + // fts-like-fallback round: boolean-context MATCH rewrites to ILIKE. + useLikeFallback = true + } else if sessVars.EnableAlternativeLogicalPlans { + // Round 1 (native). Mark the build so the driver runs the LIKE + // round and cost-compares its plan against round 1's. If this + // MATCH cannot run natively, also mark the build as non-viable + // so the driver discards round 1's plan; the rewrite continues + // with the native builtin to keep round 1 internally consistent. + er.planCtx.builder.MarkPredicateMatch() + if !er.ftsNativeViable(v.Modifier, numCols, stackLen) { + er.planCtx.builder.MarkNonViableFTSMatch() + } + } + } + } + + if useLikeFallback { + er.matchAgainstToLike(v, numCols, stackLen) + } else { + er.matchAgainstToBuiltin(v, numCols, stackLen) + } +} + +// ftsNativeViable reports whether the MATCH(...) currently being rewritten +// can be served on TiFlash by the native FTSMysqlMatchAgainst builtin. It +// walks the resolved column FieldNames sitting on ctxNameStk (stack layout is +// [..., col1, ..., colN, against]) and requires for each column: +// - the originating table has an available TiFlash replica; +// - the column is covered by a public FULLTEXT index on that table. +// +// In addition, the modifier must be the default natural-language mode. Boolean +// mode and WITH QUERY EXPANSION are not encoded in the tipb pushdown today +// (only ScalarFuncSig_FTSMatchExpression is emitted regardless of modifier), +// so a native plan that wins on cost would execute on TiFlash with the modifier +// silently dropped. Until the modifier is carried in the pushdown protocol, we +// treat those modifiers as non-viable for native pushdown. +func (er *expressionRewriter) ftsNativeViable(modifier ast.FulltextSearchModifier, numCols, stackLen int) bool { + if numCols <= 0 { + return false + } + if !ftsModifierAllowsNativePushdown(modifier) { + return false + } + builder := er.planCtx.builder + sessVars := builder.ctx.GetSessionVars() + nameStart := stackLen - numCols - 1 + for i := range numCols { + name := er.ctxNameStk[nameStart+i] + if name == nil { + return false + } + tblName := name.OrigTblName + if tblName.L == "" { + tblName = name.TblName + } + if tblName.L == "" { + return false + } + dbName := name.DBName + if dbName.L == "" { + dbName = ast.NewCIStr(sessVars.CurrentDB) + } + tblInfo, err := builder.is.TableInfoByName(dbName, tblName) + if err != nil { + return false + } + if tblInfo.TiFlashReplica == nil || !tblInfo.TiFlashReplica.Available || tblInfo.TiFlashReplica.Count == 0 { + return false + } + colName := name.OrigColName + if colName.L == "" { + colName = name.ColName + } + if !tableHasPublicFTSIndexOnColumn(tblInfo, colName.L) { + return false + } + } + return true +} + +// ftsModifierAllowsNativePushdown reports whether an FTS modifier can be +// safely served by the native FTSMysqlMatchAgainst builtin pushed to TiFlash. +// Today the tipb pushdown encodes only ScalarFuncSig_FTSMatchExpression and +// drops the modifier, so any non-default modifier would be executed by TiFlash +// as natural-language mode, silently producing wrong results. Only the default +// (natural-language, no query expansion) modifier is currently safe. +func ftsModifierAllowsNativePushdown(modifier ast.FulltextSearchModifier) bool { + return !modifier.IsBooleanMode() && !modifier.WithQueryExpansion() +} + +// tableHasPublicFTSIndexOnColumn reports whether tblInfo has a public FULLTEXT +// index covering the given column. TiDB's FULLTEXT index is single-column, so +// each column in MATCH(...) needs its own FTS index for the native path to be +// viable. +func tableHasPublicFTSIndexOnColumn(tblInfo *model.TableInfo, columnNameL string) bool { + for _, idx := range tblInfo.Indices { + if idx.FullTextInfo == nil || !idx.IsPublic() { + continue + } + if idx.FindColumnByName(columnNameL) != nil { + return true + } + } + return false +} + +// matchAgainstToBuiltin converts MATCH...AGAINST to the FTSMysqlMatchAgainst +// builtin scalar function which can be pushed down to TiFlash for execution +// against a fulltext index. +func (er *expressionRewriter) matchAgainstToBuiltin(v *ast.MatchAgainst, numCols, stackLen int) { + // Reject non-default modifiers when native is the final plan. The tipb + // pushdown protocol (see expression/distsql_builtin.go for the explicit + // note) does not serialize the FTS modifier, so TiFlash would silently + // execute Boolean-mode / query-expansion searches as natural-language + // mode. Until the modifier rides through pushdown, refuse to emit + // native here unless the alt-rounds driver is expected to discard this + // emission and rebuild via the fts-like-fallback round (which handles + // Boolean mode correctly via ILIKE; query expansion still errors there + // with a specific message). + if !ftsModifierAllowsNativePushdown(v.Modifier) && !er.matchHasLikeFallbackRescue() { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)") + return + } + + against := er.ctxStack[stackLen-1] + cols := er.ctxStack[stackLen-numCols-1 : stackLen-1] + + args := make([]expression.Expression, 0, 1+numCols) + args = append(args, against) + args = append(args, cols...) + + er.ctxStackPop(numCols + 1) + fn, err := er.newFunction(ast.FTSMysqlMatchAgainst, &v.Type, args...) + if err != nil { + er.err = err + return + } + sf, ok := fn.(*expression.ScalarFunction) + if !ok { + er.err = errors.Errorf("unexpected expression type for %s: %T", ast.FTSMysqlMatchAgainst, fn) + return + } + if err := expression.SetFTSMysqlMatchAgainstModifier(sf, v.Modifier); err != nil { + er.err = err + return + } + er.ctxStackAppend(fn, types.EmptyName) +} + +// matchAgainstToLike converts MATCH...AGAINST to LIKE predicates as a +// fallback when the native FTS pushdown path is not viable. +func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, stackLen int) { + againstExpr := er.ctxStack[stackLen-1] + + constExpr, ok := againstExpr.(*expression.Constant) + if !ok { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") + return + } + + // The LIKE fallback bakes the search value into the produced plan — either + // as ILIKE pattern constants (non-NULL case) or as a Constant(NULL) + // short-circuit. A cached plan would reuse the first execution's baked + // value for later executions, producing wrong results whenever the AGAINST + // argument is mutable: a `?` parameter marker, a user variable, or another + // deferred expression. In particular, a NULL first bind would bake a + // Constant(NULL) plan and reuse it for a later non-NULL bind. Mark the + // plan non-cacheable here, before the NULL fast-path and before Eval, so + // the skip applies uniformly across all branches below. + if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { + er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") + } + + // Reject non-string matched columns before any value-based branch so the + // column-type error always wins. In current architecture round 1's + // matchAgainstToBuiltin → getFunction (builtin_fts.go) already rejects + // non-string columns before round 2 (this function) can run, but keep + // the check here too as defense in depth: the LIKE fallback's own NULL + // fast-path and strict-subset validator below should never accept a + // non-string column, regardless of any future code path that might + // reach this function around round 1. + columns := make([]expression.Expression, numCols) + for i := range numCols { + col := er.ctxStack[stackLen-numCols-1+i] + if col.GetType(er.sctx.GetEvalCtx()).EvalType() != types.ETString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + return + } + columns[i] = col + } + + searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) + if err != nil { + er.err = err + return + } + + if searchText.IsNull() { + // NULL search yields NULL in MySQL FTS semantics + // (builtin_fts.go evalReal returns isNull=true for NULL args), so we + // emit Constant(NULL) rather than Constant(0). This preserves + // three-valued logic under NOT — NOT NULL = NULL filters the row — + // and under IS NULL / IS NOT NULL. A literal Constant(0) would make + // NOT(MATCH...) admit every row when the search is NULL, diverging + // from native semantics. + er.ctxStackPop(numCols + 1) + er.ctxStackAppend(&expression.Constant{ + Value: types.Datum{}, + RetType: types.NewFieldType(mysql.TypeTiny), + }, types.EmptyName) + return + } + + if searchText.Kind() != types.KindString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search expression") + return + } + + // The LIKE fallback only translates a strict subset of MySQL FTS search + // strings (alphanumeric words, optionally prefixed with + or - in boolean + // mode). Anything outside that subset would tokenize differently in MySQL + // FTS than a substring LIKE match, so refuse it here. If the same MATCH + // is independently native-viable (FTS index + supported modifier), + // delegate to the native builtin so TiFlash serves it correctly; otherwise + // surface the error to the user. + if err := expression.ValidateFTSSearchStringForLikeFallback(searchText.GetString(), v.Modifier); err != nil { + if er.ftsNativeViable(v.Modifier, numCols, stackLen) { + er.matchAgainstToBuiltin(v, numCols, stackLen) + return + } + er.err = err + return + } + + er.ctxStackPop(numCols + 1) + + result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) + if err != nil { + er.err = err + return + } + + er.ctxStackAppend(result, types.EmptyName) +} + func (er *expressionRewriter) regexpToScalarFunc(v *ast.PatternRegexpExpr) { l := len(er.ctxStack) er.err = expression.CheckArgsNotMultiColumnRow(er.ctxStack[l-2:]...) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go new file mode 100644 index 0000000000000..72f0cb04f519e --- /dev/null +++ b/pkg/planner/core/fulltext_to_like.go @@ -0,0 +1,76 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" +) + +// convertMatchAgainstToLike converts a MATCH...AGAINST expression to ILIKE +// predicates. It is a thin wrapper around expression.BuildFTSToILikeExpression; +// the conversion logic lives in pkg/expression so the same translation can be +// shared with cardinality-based selectivity estimation (which substitutes the +// equivalent ILIKE form for the opaque FTSMysqlMatchAgainst builtin). +// +// This is a fallback rewrite since TiDB does not natively support full-text +// search outside the TiFlash FTS path. The planner only invokes it in +// direct-boolean predicate positions — every ancestor up to the +// WHERE / HAVING / JOIN ON root must be AND / OR / NOT / parens +// (see inDirectMatchBooleanContext in expression_rewriter.go). Scoring +// contexts (SELECT field list, ORDER BY) and scalar predicate positions +// (IS NULL, comparisons, CASE, arithmetic) keep the native +// FTSMysqlMatchAgainst builtin so the result is a float relevance score +// rather than 0/1, even though the native path then requires TiFlash at +// execution time. The semantic differences below therefore apply to +// direct-boolean predicate use only: +// +// 1. No relevance scoring — the synthesized ILIKE predicate produces a 0/1 +// boolean filter result, which is the only thing a direct-boolean +// predicate position consumes. Relevance-score positions (ORDER BY, +// scalar SELECT, MATCH ... = 0, MATCH ... > 0.5, etc.) are intentionally +// NOT routed through this fallback; substituting 0/1 there would +// silently corrupt the sort or the comparison. +// 2. No stop word filtering — searches for all words regardless of length +// or commonness. +// 3. No word length limits — MySQL ignores words shorter than +// ft_min_word_len (default 4); the ILIKE rewrite does not. +// 4. No word boundaries — LIKE %term% matches substrings anywhere, not just +// complete words. Example: "cat" matches "concatenate", "category", +// "application"; MySQL FTS only matches "cat" as a standalone word. +// Enforcing word boundaries would require REGEXP, which we avoid. +// 5. Performance — LIKE predicates cannot use full-text indexes (much +// slower on large datasets). +// +// Search-string subset accepted by the rewrite (enforced upstream by +// expression.ValidateFTSSearchStringForLikeFallback): +// +// - Natural-language mode: whitespace-separated alphanumeric words only. +// - Boolean mode: each token is `word`, `+word` (required), or `-word` +// (excluded), where `word` is alphanumeric (ASCII or non-ASCII UTF-8). +// +// Anything outside that subset — phrases, * prefix, > < ~ relevance +// modifiers, () grouping, mid-word punctuation like `xx-yy` — is rejected +// at plan time with ErrNotSupportedYet because MySQL FTS tokenizes those +// constructs in ways a substring LIKE cannot reproduce. WITH QUERY +// EXPANSION is likewise rejected (no LIKE approximation exists for the +// second-pass tokenization). +func (er *expressionRewriter) convertMatchAgainstToLike( + columns []expression.Expression, + searchText string, + modifier ast.FulltextSearchModifier, +) (expression.Expression, error) { + return expression.BuildFTSToILikeExpression(er.sctx, columns, searchText, modifier) +} diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go new file mode 100644 index 0000000000000..159eccf7e9cda --- /dev/null +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -0,0 +1,134 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/stretchr/testify/require" +) + +func TestFTSModifierAllowsNativePushdown(t *testing.T) { + tests := []struct { + name string + modifier ast.FulltextSearchModifier + expected bool + }{ + { + name: "natural language mode (default)", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode), + expected: true, + }, + { + name: "boolean mode", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode), + expected: false, + }, + { + name: "natural language mode with query expansion", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.expected, ftsModifierAllowsNativePushdown(tt.modifier)) + }) + } +} + +func TestTableHasPublicFTSIndexOnColumn(t *testing.T) { + ftsIdx := func(name, column string, state model.SchemaState) *model.IndexInfo { + return &model.IndexInfo{ + Name: ast.NewCIStr(name), + State: state, + Tp: ast.IndexTypeInvalid, + Columns: []*model.IndexColumn{{Name: ast.NewCIStr(column)}}, + FullTextInfo: &model.FullTextIndexInfo{ParserType: model.FullTextParserTypeStandardV1}, + } + } + plainIdx := func(name, column string) *model.IndexInfo { + return &model.IndexInfo{ + Name: ast.NewCIStr(name), + State: model.StatePublic, + Tp: ast.IndexTypeBtree, + Columns: []*model.IndexColumn{{Name: ast.NewCIStr(column)}}, + } + } + + tests := []struct { + name string + indices []*model.IndexInfo + column string + expected bool + }{ + { + name: "no indices", + indices: nil, + column: "title", + expected: false, + }, + { + name: "only non-FTS index on the column", + indices: []*model.IndexInfo{plainIdx("idx_title", "title")}, + column: "title", + expected: false, + }, + { + name: "public FTS index on the column", + indices: []*model.IndexInfo{ftsIdx("ft_title", "title", model.StatePublic)}, + column: "title", + expected: true, + }, + { + name: "non-public FTS index on the column", + indices: []*model.IndexInfo{ftsIdx("ft_title", "title", model.StateWriteReorganization)}, + column: "title", + expected: false, + }, + { + name: "FTS index on a different column", + indices: []*model.IndexInfo{ftsIdx("ft_body", "body", model.StatePublic)}, + column: "title", + expected: false, + }, + { + name: "FTS index covers the column among many indices", + indices: []*model.IndexInfo{ + plainIdx("idx_id", "id"), + ftsIdx("ft_body", "body", model.StatePublic), + ftsIdx("ft_title", "title", model.StatePublic), + }, + column: "title", + expected: true, + }, + { + name: "case-insensitive column match", + indices: []*model.IndexInfo{ftsIdx("ft_title", "Title", model.StatePublic)}, + column: "title", + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tblInfo := &model.TableInfo{Indices: tt.indices} + require.Equal(t, tt.expected, tableHasPublicFTSIndexOnColumn(tblInfo, tt.column)) + }) + } +} diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index 8b54d178b282f..ceab10b9bd362 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -327,6 +327,50 @@ type PlanBuilder struct { allowBuildCastArray bool // resolveCtx is set when calling Build, it's only effective in the current Build call. resolveCtx *resolve.Context + + // nonViableFTSMatch is set during build when the expression rewriter + // encounters a predicate-context MATCH...AGAINST whose native form + // (FTSMysqlMatchAgainst) cannot be executed — the matched columns lack a + // public FULLTEXT index on a TiFlash-backed table, or the modifier is not + // supported by pushdown. The flag is read by the alternative-rounds driver + // after the round to invalidate the round's plan and trigger the + // fts-like-fallback round (see optimize.go). + nonViableFTSMatch bool + + // predicateMatchSeen is set during build when the expression rewriter + // encounters a direct-boolean-context MATCH...AGAINST (one whose 0/1 boolean + // result is consumed directly as a predicate). The alternative-rounds driver + // uses this to enable the fts-like-fallback round even when round 1's + // native plan is executable, so the LIKE-based plan can compete on cost. + predicateMatchSeen bool +} + +// HasNonViableFTSMatch reports whether the most recent build round saw a +// predicate-context MATCH...AGAINST that could not be served by the native +// FTSMysqlMatchAgainst builtin. The caller (optimize.go) uses this to +// invalidate the round's plan and trigger the fts-like-fallback round. +func (b *PlanBuilder) HasNonViableFTSMatch() bool { + return b.nonViableFTSMatch +} + +// MarkNonViableFTSMatch records that a predicate-context MATCH...AGAINST in +// the current build cannot be served natively. See HasNonViableFTSMatch. +func (b *PlanBuilder) MarkNonViableFTSMatch() { + b.nonViableFTSMatch = true +} + +// HasPredicateMatch reports whether the most recent build round saw a +// direct-boolean-context MATCH...AGAINST. The caller (optimize.go) uses this +// to decide whether to run the fts-like-fallback round for cost competition, +// independent of whether round 1's native plan is executable. +func (b *PlanBuilder) HasPredicateMatch() bool { + return b.predicateMatchSeen +} + +// MarkPredicateMatch records that the current build encountered a +// direct-boolean-context MATCH...AGAINST. See HasPredicateMatch. +func (b *PlanBuilder) MarkPredicateMatch() { + b.predicateMatchSeen = true } type handleColHelper struct { diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 3e5463b430030..0dc994d997aef 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -561,6 +561,26 @@ func buildAndOptimizeLogicalPlanRound( return nil, nil, false, err } + // Record predicate-context MATCH for cost competition. The fts-like-fallback + // alternative round reads this signal to decide whether to build a competing + // ILIKE-based plan alongside round 1's native plan, so the cheaper of the + // two wins via the normal alt-rounds cost comparison. + if builder.HasPredicateMatch() { + sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanHasPredicateContextMatch = true + } + + // If this round saw a predicate-context MATCH that cannot be served by the + // native FTSMysqlMatchAgainst builtin, the produced plan would fail at + // execution. Discard it and arm AlternativeLogicalPlanFTSLikeFallback so + // any intervening rounds (correlate, etc.) re-rewrite with ILIKE too. The + // fts-like-fallback round below also forces this flag during setup; this + // outer assignment covers the non-viable case where the flag must stay + // true across all subsequent rounds, not just inside the LIKE round. + if builder.HasNonViableFTSMatch() { + sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + return p, names, false, nil + } + if *bestPlan == nil || cost < *bestCost { *bestCost = cost *bestPlan = finalPlan @@ -608,6 +628,12 @@ type alternativeRound struct { // wrapper. Safe because optimize is single-threaded per session. var savedEnableCorrelateSubquery bool +// savedFTSLikeFallback holds the pre-round value of +// AlternativeLogicalPlanFTSLikeFallback so the fts-like-fallback round's +// setup/cleanup can restore it after running with the flag forced on. Safe +// because optimize is single-threaded per session. +var savedFTSLikeFallback bool + var alternativeRounds = [...]alternativeRound{ { name: "non-decorrelate", @@ -631,6 +657,34 @@ var alternativeRounds = [...]alternativeRound{ sv.EnableCorrelateSubquery = savedEnableCorrelateSubquery }, }, + { + // fts-like-fallback: rebuild the plan rewriting predicate-context + // MATCH...AGAINST to ILIKE so it can compete with round 1's native plan + // on cost (and serve as the only valid plan when native is non-viable). + // Round 1 always uses the native builtin (same as Alt-disabled). This + // round fires whenever round 1 saw a direct-boolean-context MATCH + // (HasPredicateContextMatch) — both plans then compete via the strict-`<` + // cost comparison in buildAndOptimizeLogicalPlanRound — or whenever + // round 1 saw a MATCH whose native form cannot execute + // (FTSLikeFallback, set by the round driver after discarding round 1). + // In the discard case, round 1's plan is unavailable and this round's + // plan wins by default. + name: "fts-like-fallback", + enabled: func(sv *variable.SessionVars) bool { + if !sv.EnableAlternativeLogicalPlans { + return false + } + return sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback || + sv.StmtCtx.AlternativeLogicalPlanHasPredicateContextMatch + }, + setup: func(sv *variable.SessionVars) { + savedFTSLikeFallback = sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + }, + cleanup: func(sv *variable.SessionVars) { + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = savedFTSLikeFallback + }, + }, } func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW, is infoschema.InfoSchema) (base.Plan, types.NameSlice, float64, error) { @@ -676,6 +730,18 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW if needRestoreLogicalPlanCtx { initialLogicalPlanCtx = saveLogicalPlanBuildCtx(sessVars) sessVars.StmtCtx.ResetAlternativeLogicalPlanSignals() + // Round 1 always uses the native FTSMysqlMatchAgainst builtin — same as + // the Alt-disabled default. The build records two signals on the + // planBuilder when MATCH...AGAINST is seen: + // * HasPredicateMatch: any direct-boolean-context MATCH. The round + // driver propagates this into stmtctx to trigger the + // fts-like-fallback alternative round, which builds a competing + // ILIKE-based plan; the cheaper of the two wins. + // * HasNonViableFTSMatch: a predicate-context MATCH whose native form + // cannot execute (no FTS index / no TiFlash replica / unsupported + // modifier). The round driver discards round 1's plan and forces + // AlternativeLogicalPlanFTSLikeFallback true so all subsequent + // rounds (correlate, etc.) re-rewrite with ILIKE. } p, names, nonLogical, err := buildAndOptimizeLogicalPlanRound( @@ -714,6 +780,7 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW enabledRounds = append(enabledRounds, round) } } + var lastAltRoundErr error for _, round := range enabledRounds { restoreLogicalPlanBuildCtx(sessVars, initialLogicalPlanCtx) failpoint.Inject("failIfAlternativeLogicalPlanRoundTriggered", func(val failpoint.Value) { @@ -749,10 +816,15 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW }() if err != nil { // Alternative rounds are optional optimizations. If one fails, - // log and continue — the first round's plan is still valid. + // log and continue — the first round's plan is still valid in + // the general case. fts-like-fallback is the exception: the + // first round's plan may have been discarded as non-executable, + // so we remember the last alt-round error in case bestPlan + // remains nil after the loop. logutil.BgLogger().Warn("alternative logical plan round failed", zap.String("round", round.name), zap.Error(err)) + lastAltRoundErr = err continue } if nonLogical { @@ -760,6 +832,13 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW } } if bestPlan == nil { + if lastAltRoundErr != nil { + // No valid plan from any round. Surface the most recent alt-round + // error rather than the generic sentinel — typically this is the + // fts-like-fallback round reporting why MATCH...AGAINST cannot be + // rewritten (unsupported search string, etc.). + return nil, nil, 0, lastAltRoundErr + } return nil, nil, 0, errors.New("failed to build logical plan") } if needRestoreLogicalPlanCtx { diff --git a/pkg/planner/util/null_misc_test.go b/pkg/planner/util/null_misc_test.go index 3461068806664..0e2c1eb41f741 100644 --- a/pkg/planner/util/null_misc_test.go +++ b/pkg/planner/util/null_misc_test.go @@ -38,7 +38,7 @@ func TestNullRejectBuiltinRegistrySnapshot(t *testing.T) { sum := sha256.Sum256([]byte(strings.Join(names, "\n"))) require.NotEmpty(t, names) - require.Equal(t, "a5ce0716b778fb8e0b488d3a11c402d8a8224191757a9e02ece80895d5d67e05", hex.EncodeToString(sum[:])) + require.Equal(t, "729f5252bcd91efe1a4bbf0c383a36c5a2e52ed2d90d7aab0a3e0b450322294c", hex.EncodeToString(sum[:])) for name := range nullRejectRejectNullTests { require.Contains(t, names, name) diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 6e39e3f88ecb2..3b35aa76e9d47 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -486,6 +486,27 @@ type StatementContext struct { // build round encountered a non-correlated IN subquery eligible for the // correlate-to-Apply alternative. AlternativeLogicalPlanPreferCorrelate bool + // AlternativeLogicalPlanFTSLikeFallback is a mode flag controlling how the + // expression rewriter handles MATCH...AGAINST in predicate contexts. When + // false (the default, matching Alt-disabled behavior) the rewriter emits + // the native FTSMysqlMatchAgainst builtin. When true, the rewriter emits + // ILIKE-based predicates instead. + // + // Round 1 always runs with this flag false. The "fts-like-fallback" + // alternative round flips it to true (via its setup/cleanup) while it + // builds a competing ILIKE-based plan; the cost-cheapest plan wins via the + // normal alt-rounds cost comparison. If round 1's build records a + // predicate-context MATCH that cannot be served natively (no FTS index on a + // matched column / no TiFlash replica / modifier not pushdown-supported), + // optimize.go additionally invalidates round 1's plan and forces this flag + // true outside the round so any intervening rounds (correlate, etc.) also + // produce executable LIKE-based plans. + AlternativeLogicalPlanFTSLikeFallback bool + // AlternativeLogicalPlanHasPredicateContextMatch indicates that round 1 + // encountered a direct-boolean-context MATCH...AGAINST. The round driver + // uses this to enable the fts-like-fallback round for cost competition even + // when round 1's native plan is executable. + AlternativeLogicalPlanHasPredicateContextMatch bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -665,6 +686,8 @@ func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { sc.AlternativeLogicalPlanDecorrelatedApply = false sc.AlternativeLogicalPlanSameOrderIndexJoin = false sc.AlternativeLogicalPlanOrderAwareJoinReorder = false + sc.AlternativeLogicalPlanFTSLikeFallback = false + sc.AlternativeLogicalPlanHasPredicateContextMatch = false sc.AlternativeLogicalPlanPreferCorrelate = false } diff --git a/tests/integrationtest/r/executor/show.result b/tests/integrationtest/r/executor/show.result index 69793e37958fd..625d9a6215550 100644 --- a/tests/integrationtest/r/executor/show.result +++ b/tests/integrationtest/r/executor/show.result @@ -754,6 +754,7 @@ ltrim make_set makedate maketime +match_against md5 microsecond mid diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result new file mode 100644 index 0000000000000..30e7acaf517ea --- /dev/null +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -0,0 +1,240 @@ +set tidb_cost_model_version=1; +set @@tidb_opt_enable_alternative_logical_plans=ON; +drop table if exists articles; +create table articles (id int primary key, title varchar(200), body text); +insert into articles values +(1, 'MySQL Tutorial', 'This tutorial provides a basic MySQL tutorial'), +(2, 'How To Use MySQL Well', 'After you went through a MySQL tutorial'), +(3, 'Optimizing MySQL', 'In this tutorial we will show how to optimize MySQL'), +(4, 'MySQL vs. PostgreSQL', 'This article compares MySQL and PostgreSQL'), +(5, 'MySQL Security', 'How to secure your MySQL database'); +select id, title from articles where match(title) against('MySQL tutorial'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title, body) against('MySQL tutorial'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); +id title +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('Optim*' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'Optim*' is not supported in the LIKE fallback' +select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); +id title +5 MySQL Security +select id, title from articles where match(title) against('tutorial security' in boolean mode); +id title +1 MySQL Tutorial +5 MySQL Security +select id, title from articles where match(title) against(''); +id title +set @@tidb_opt_enable_alternative_logical_plans=OFF; +select id, title from articles where match(title) against('MySQL'); +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +set @@tidb_opt_enable_alternative_logical_plans=ON; +select id, title from articles where match(title) against('MySQL'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +drop table if exists special_chars; +create table special_chars (id int primary key, content varchar(200)); +insert into special_chars values +(1, 'Progress is at 100%'), +(2, 'Progress is at 50%'), +(3, 'File name is test_file.txt'), +(4, 'Path is C:\\Windows\\System32'), +(5, 'Normal text without special chars'); +select id, content from special_chars where match(content) against('100%'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '100%' is not supported in the LIKE fallback' +select id, content from special_chars where match(content) against('test_file'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'test_file' is not supported in the LIKE fallback' +select id, content from special_chars where match(content) against('C:\\Windows'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'C:\Windows' is not supported in the LIKE fallback' +select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+100%' is not supported in the LIKE fallback' +drop table if exists special_chars; +select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); +id title +select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('+MySQL +tutorial +-Security' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+*' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); +id title +1 MySQL Tutorial +3 Optimizing MySQL +select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"How' is not supported in the LIKE fallback' +select id, title from articles where match(title) against(' +'); +id title +select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '-"MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+"How' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('tutorial -Security' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); +id title +1 MySQL Tutorial +4 MySQL vs. PostgreSQL +select id, title from articles where match(title) against('MySQL, PostgreSQL.'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'MySQL,' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('>MySQL MySQL' is not supported in the LIKE fallback' +select id, title from articles where match(title) against(NULL); +id title +select id, title from articles where not match(title) against(NULL); +id title +select id, title from articles where (match(title) against(NULL)); +id title +select id, title from articles where match(title) against('~Security ~PostgreSQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '~Security' is not supported in the LIKE fallback' +select id, title from articles where match(title) against('MySQL' with query expansion); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback' +select id, match(title) against('MySQL') as score from articles; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles order by match(title) against('MySQL') desc; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles group by id, title having match(title) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +select a.id, a.title from articles a inner join articles a2 on a.id = a2.id and match(a.title) against('Security'); +id title +5 MySQL Security +select id from articles where match(id) against('MySQL'); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id from articles where match(id) against('xx-yy'); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id from articles where match(id) against(NULL); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id, title from articles +where match(title) against('MySQL') and match(body) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +select id, title from articles where not match(title) against('MySQL'); +id title +select id, title from articles where (match(title) against('MySQL')); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where (match(title) against('MySQL')) is null; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (match(title) against('MySQL')) > 0.5; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (match(title) against('MySQL')) = 0; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (case when match(title) against('MySQL') then 1 else 0 end) = 1; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_lit from 'select id, title from articles where match(title) against(''MySQL'')'; +execute st_fts_lit; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +execute st_fts_lit; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select @@last_plan_from_cache; +@@last_plan_from_cache +1 +deallocate prepare st_fts_lit; +set @@tidb_enable_prepared_plan_cache=DEFAULT; +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts from 'select id, title from articles where match(title) against(?)'; +set @q='MySQL'; +execute st_fts using @q; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +execute st_fts using @q; +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select @@last_plan_from_cache; +@@last_plan_from_cache +0 +set @q='PostgreSQL'; +execute st_fts using @q; +id title +4 MySQL vs. PostgreSQL +deallocate prepare st_fts; +set @@tidb_enable_prepared_plan_cache=DEFAULT; +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_null from 'select id, title from articles where match(title) against(?)'; +set @q = NULL; +execute st_fts_null using @q; +id title +set @q = 'PostgreSQL'; +execute st_fts_null using @q; +id title +4 MySQL vs. PostgreSQL +select @@last_plan_from_cache; +@@last_plan_from_cache +0 +deallocate prepare st_fts_null; +set @@tidb_enable_prepared_plan_cache=DEFAULT; +select id, match(title) against('+MySQL' in boolean mode) as score from articles; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +select id, title from articles where (match(title) against('+MySQL' in boolean mode)) is null; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +set @@tidb_opt_enable_alternative_logical_plans=OFF; +select id, title from articles where match(title) against('+MySQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +set @@tidb_opt_enable_alternative_logical_plans=ON; +set @@tidb_opt_enable_alternative_logical_plans=OFF; +drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test new file mode 100644 index 0000000000000..f1b64c52ce563 --- /dev/null +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -0,0 +1,321 @@ +# Test cases for MATCH...AGAINST to LIKE conversion via alternative logical plans + +# Setup +set tidb_cost_model_version=1; +set @@tidb_opt_enable_alternative_logical_plans=ON; +drop table if exists articles; +create table articles (id int primary key, title varchar(200), body text); +insert into articles values + (1, 'MySQL Tutorial', 'This tutorial provides a basic MySQL tutorial'), + (2, 'How To Use MySQL Well', 'After you went through a MySQL tutorial'), + (3, 'Optimizing MySQL', 'In this tutorial we will show how to optimize MySQL'), + (4, 'MySQL vs. PostgreSQL', 'This article compares MySQL and PostgreSQL'), + (5, 'MySQL Security', 'How to secure your MySQL database'); + +# Test 1: Natural Language Mode - Single Column +select id, title from articles where match(title) against('MySQL tutorial'); + +# Test 2: Natural Language Mode - Multiple Columns +select id, title from articles where match(title, body) against('MySQL tutorial'); + +# Test 3: Boolean Mode - Required Terms +select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); + +# Test 4: Boolean Mode - Excluded Terms +select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); + +# Test 5: Boolean Mode - Prefix Wildcard is rejected by the strict subset (LIKE +# cannot enforce word-start boundaries; MySQL FTS would only match words +# starting with the prefix). Falls back to native FTS path; without an FTS +# index, surfaces the rewrite error. +-- error 1235 +select id, title from articles where match(title) against('Optim*' in boolean mode); + +# Test 6: Boolean Mode - Exact Phrase is rejected by the strict subset (LIKE +# cannot enforce word boundaries inside a phrase). Falls back to native FTS +# path; without an FTS index, surfaces the rewrite error. +-- error 1235 +select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); + +# Test 7: Boolean Mode - Complex Query +select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); + +# Test 8: Boolean Mode - Optional Terms +select id, title from articles where match(title) against('tutorial security' in boolean mode); + +# Test 9: Empty Search String +select id, title from articles where match(title) against(''); + +# Test 10: Test without alternative plans (native FTS path - errors without TiFlash) +set @@tidb_opt_enable_alternative_logical_plans=OFF; +-- error 1105 +select id, title from articles where match(title) against('MySQL'); + +# Test 11: Switch back to alternative plans mode +set @@tidb_opt_enable_alternative_logical_plans=ON; +select id, title from articles where match(title) against('MySQL'); + +# Test 12: Natural Language Mode with single word +select id, title from articles where match(title) against('PostgreSQL'); + +# Test 13: Special characters in search strings are rejected by the strict +# subset (MySQL FTS treats %, _, \, : etc. as word separators or operators, +# so a substring LIKE on them would produce results inconsistent with MySQL +# FTS tokenization). Each rejection falls back to the native FTS path; without +# an FTS index, surfaces the rewrite error. +drop table if exists special_chars; +create table special_chars (id int primary key, content varchar(200)); +insert into special_chars values + (1, 'Progress is at 100%'), + (2, 'Progress is at 50%'), + (3, 'File name is test_file.txt'), + (4, 'Path is C:\\Windows\\System32'), + (5, 'Normal text without special chars'); + +-- error 1235 +select id, content from special_chars where match(content) against('100%'); + +-- error 1235 +select id, content from special_chars where match(content) against('test_file'); + +-- error 1235 +select id, content from special_chars where match(content) against('C:\\Windows'); + +-- error 1235 +select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); + +drop table if exists special_chars; + +# Test 14: Boolean mode - only excluded terms (no required/optional) +select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); + +# Test 15: Boolean mode - quote is rejected by the strict subset. +-- error 1235 +select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); + +# Test 16: Boolean mode - mixed whitespace (tabs and newlines) +select id, title from articles where match(title) against('+MySQL +tutorial +-Security' in boolean mode); + +# Test 17: Boolean mode - `*` is rejected by the strict subset. +-- error 1235 +select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); + +# Test 18: Boolean mode - multiple excluded terms +select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); + +# Test 19: Boolean mode - mixed `*` and quoted phrase are rejected. +-- error 1235 +select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); + +# Test 20: Natural language mode - only whitespace +select id, title from articles where match(title) against(' + '); + +# Test 21: Natural language mode - multiple spaces between words +select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); + +# Test 22: Boolean mode - required phrase rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); + +# Test 23: Boolean mode - excluded phrase rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); + +# Test 24: Boolean mode - phrase mixed with words rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); + +# Test 25: Boolean mode - optional + excluded (optional treated as required filter) +select id, title from articles where match(title) against('tutorial -Security' in boolean mode); + +# Test 26: Boolean mode - optional + excluded with multiple optionals +select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); + +# Test 27: Natural language mode - punctuation in tokens rejected by strict +# subset. MySQL FTS would tokenize away the punctuation, but a substring LIKE +# would include it, so we refuse the rewrite. +-- error 1235 +select id, title from articles where match(title) against('MySQL, PostgreSQL.'); + +# Test 28: Boolean mode - relevance modifiers > < rejected by strict subset. +-- error 1235 +select id, title from articles where match(title) against('>MySQL 0.5; + +# Test 36g: Scalar-position MATCH — explicit comparison to 0. Native returns +# the score (≥0). Coincidentally a LIKE 0/1 result agrees on "no match" +# rows, but we still route to native to preserve relevance-score semantics +# uniformly across scalar positions. +-- error 1105 +select id, title from articles where (match(title) against('MySQL')) = 0; + +# Test 36h: Scalar-position MATCH inside CASE WHEN. The WHEN expression takes +# a boolean condition, but the MATCH is buried under the CASE node, which is +# a non-boolean ancestor. Falls through to native. +-- error 1105 +select id, title from articles where (case when match(title) against('MySQL') then 1 else 0 end) = 1; + +# Test 37: Plan cache - prepared statement with literal AGAINST IS cacheable. +# The LIKE rewrite bakes the search string into pattern constants; for a true +# literal those constants are stable across executions, so the plan must be +# cacheable. Verifies the LIKE fallback only skips plan cache when the AGAINST +# constant is mutable (param marker / deferred expr), not for plain literals. +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_lit from 'select id, title from articles where match(title) against(''MySQL'')'; +execute st_fts_lit; +execute st_fts_lit; +select @@last_plan_from_cache; +deallocate prepare st_fts_lit; +set @@tidb_enable_prepared_plan_cache=DEFAULT; + +# Test 38: Plan cache - prepared statement with ? in AGAINST must NOT cache. +# A param marker is mutable across executions; baking the first execution's +# pattern would silently produce wrong results when the bind value changes. +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts from 'select id, title from articles where match(title) against(?)'; +set @q='MySQL'; +execute st_fts using @q; +execute st_fts using @q; +select @@last_plan_from_cache; +# Bind a different value to confirm results stay correct under the non-cached plan. +set @q='PostgreSQL'; +execute st_fts using @q; +deallocate prepare st_fts; +set @@tidb_enable_prepared_plan_cache=DEFAULT; + +# Test 38a: Prepared statement with a NULL first bind followed by a non-NULL +# bind. Pre-fix the LIKE fallback's NULL fast-path emitted Constant(0) and +# ran BEFORE the plan-cache skip check, so the prepared plan could cache a +# constant-false plan that a later non-NULL bind would silently reuse, +# returning zero rows instead of the matching ones. The fix moves the +# plan-cache skip ahead of the NULL fast-path (so mutable AGAINST always +# disables caching) and changes the NULL emission to Constant(NULL) (so +# NULL three-valued logic is preserved). After the fix, the second execute +# must return the PostgreSQL row, and @@last_plan_from_cache must be 0. +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_null from 'select id, title from articles where match(title) against(?)'; +set @q = NULL; +execute st_fts_null using @q; +set @q = 'PostgreSQL'; +execute st_fts_null using @q; +select @@last_plan_from_cache; +deallocate prepare st_fts_null; +set @@tidb_enable_prepared_plan_cache=DEFAULT; + +# Note: user variables in AGAINST (e.g., AGAINST(@search)) are rejected at +# rewrite time as a non-constant search string, so they never reach the +# plan-cache decision and need no separate cache-skip coverage here. + +# Test 39: Non-default modifier in a scoring context (SELECT field). LIKE +# cannot produce a float relevance score so it can't rescue this; the modifier +# guard in matchAgainstToBuiltin must error at plan time rather than emit a +# native FTS expression that TiFlash would silently execute as natural-language +# mode (the tipb pushdown protocol drops the modifier). +-- error 1235 +select id, match(title) against('+MySQL' in boolean mode) as score from articles; + +# Test 40: Non-default modifier in ORDER BY (scoring context). Same guard. +-- error 1235 +select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; + +# Test 41: Non-default modifier in a scalar predicate position (IS NULL). +# Even with alternative logical plans enabled, the LIKE round only rewrites +# direct-boolean MATCHes; the scalar position falls through to native, which +# must reject the modifier rather than mistranslate it on TiFlash. +-- error 1235 +select id, title from articles where (match(title) against('+MySQL' in boolean mode)) is null; + +# Test 42: Boolean mode in WHERE with alternative logical plans disabled. +# Without the fts-like-fallback rescue, native is the final plan, so the +# modifier guard must fire. (Pre-PR this query would push to TiFlash and +# silently execute as natural-language mode.) +set @@tidb_opt_enable_alternative_logical_plans=OFF; +-- error 1235 +select id, title from articles where match(title) against('+MySQL' in boolean mode); +set @@tidb_opt_enable_alternative_logical_plans=ON; + +# Cleanup +set @@tidb_opt_enable_alternative_logical_plans=OFF; +drop table if exists articles;