From a5a640084a9426d7ee80a2d3c91d72f1115d5169 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 17 Jan 2026 18:07:51 -0800 Subject: [PATCH 01/42] planner: rewrite FTS predicates to LIKE if no FTS index --- pkg/planner/core/BUILD.bazel | 2 + pkg/planner/core/expression_rewriter.go | 70 +++++ pkg/planner/core/fulltext_to_like.go | 295 ++++++++++++++++++ pkg/planner/core/fulltext_to_like_test.go | 131 ++++++++ pkg/sessionctx/vardef/tidb_vars.go | 5 + pkg/sessionctx/variable/session.go | 4 + pkg/sessionctx/variable/sysvar.go | 4 + .../t/planner/core/fulltext_search.test | 55 ++++ 8 files changed, 566 insertions(+) create mode 100644 pkg/planner/core/fulltext_to_like.go create mode 100644 pkg/planner/core/fulltext_to_like_test.go create mode 100644 tests/integrationtest/t/planner/core/fulltext_search.test diff --git a/pkg/planner/core/BUILD.bazel b/pkg/planner/core/BUILD.bazel index 49100398ecb55..c71ac428ee9ce 100644 --- a/pkg/planner/core/BUILD.bazel +++ b/pkg/planner/core/BUILD.bazel @@ -13,6 +13,7 @@ go_library( "expression_rewriter.go", "find_best_task.go", "flat_plan.go", + "fulltext_to_like.go", "hint_utils.go", "index_join_path.go", "indexmerge_path.go", @@ -209,6 +210,7 @@ go_test( "exhaust_physical_plans_test.go", "expression_test.go", "find_best_task_test.go", + "fulltext_to_like_test.go", "hint_test.go", "indexmerge_intersection_test.go", "indexmerge_path_test.go", diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index a0f8f5208077b..3d3b61d263bdd 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1692,6 +1692,8 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok } er.ctxStack[len(er.ctxStack)-1].SetCoercibility(expression.CoercibilityExplicit) er.ctxStack[len(er.ctxStack)-1].SetCharsetAndCollation(arg.GetType(er.sctx.GetEvalCtx()).GetCharset(), arg.GetType(er.sctx.GetEvalCtx()).GetCollate()) + case *ast.MatchAgainst: + er.matchAgainstToExpression(v) default: er.err = errors.Errorf("UnknownType: %T", v) return retNode, false @@ -2217,6 +2219,74 @@ func (er *expressionRewriter) patternLikeOrIlikeToExpression(v *ast.PatternLikeO er.ctxStackAppend(function, types.EmptyName) } +func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { + // Check the session variable to determine behavior + var fallbackMode string + if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { + fallbackMode = er.planCtx.builder.ctx.GetSessionVars().FulltextSearchFallback + } else { + fallbackMode = "like" // default + } + + if fallbackMode == "error" { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST without fulltext index") + return + } + + // The Against expression has been visited and should be on the ctxStack + // Pop it from the stack + l := len(er.ctxStack) + if l < 1 { + er.err = errors.Errorf("MATCH...AGAINST: expected Against expression on stack") + return + } + + againstExpr := er.ctxStack[l-1] + er.ctxStackPop(1) + + // Check if it's a constant string + constExpr, ok := againstExpr.(*expression.Constant) + if !ok { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") + return + } + + searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) + if err != nil { + er.err = err + return + } + + if searchText.Kind() != types.KindString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search expression") + return + } + + // Resolve column expressions + var columns []expression.Expression + for _, colName := range v.ColumnNames { + idx, err := expression.FindFieldName(er.names, colName) + if err != nil { + er.err = err + return + } + if idx < 0 { + er.err = errors.Errorf("Unknown column '%s' in MATCH...AGAINST", colName.Name.O) + return + } + columns = append(columns, er.schema.Columns[idx]) + } + + // Convert to LIKE predicates + result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) + if err != nil { + er.err = err + return + } + + er.ctxStackAppend(result, types.EmptyName) +} + func (er *expressionRewriter) regexpToScalarFunc(v *ast.PatternRegexpExpr) { l := len(er.ctxStack) er.err = expression.CheckArgsNotMultiColumnRow(er.ctxStack[l-2:]...) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go new file mode 100644 index 0000000000000..7654505c87ba2 --- /dev/null +++ b/pkg/planner/core/fulltext_to_like.go @@ -0,0 +1,295 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "strings" + + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" +) + +// searchTerm represents a single term in a Boolean fulltext search query +type searchTerm struct { + word string + isRequired bool // Has '+' prefix + isExcluded bool // Has '-' prefix + isPrefixMatch bool // Has '*' suffix + isPhrase bool // Wrapped in quotes +} + +// parseBooleanSearchString parses a Boolean mode search string into individual terms +func parseBooleanSearchString(text string) []searchTerm { + var terms []searchTerm + var current strings.Builder + inQuote := false + i := 0 + + for i < len(text) { + ch := text[i] + + switch ch { + case '"': + if inQuote { + // End of phrase + phrase := current.String() + if phrase != "" { + terms = append(terms, searchTerm{ + word: phrase, + isPhrase: true, + }) + } + current.Reset() + inQuote = false + } else { + // Start of phrase + inQuote = true + } + i++ + case ' ', '\t', '\n', '\r': + if inQuote { + current.WriteByte(ch) + } else if current.Len() > 0 { + // End of word + word := current.String() + terms = append(terms, parseSearchTerm(word)) + current.Reset() + } + i++ + default: + current.WriteByte(ch) + i++ + } + } + + // Handle remaining content + if current.Len() > 0 { + if inQuote { + // Unclosed quote, treat as phrase + terms = append(terms, searchTerm{ + word: current.String(), + isPhrase: true, + }) + } else { + word := current.String() + terms = append(terms, parseSearchTerm(word)) + } + } + + return terms +} + +// parseSearchTerm parses a single search term (not in quotes) and extracts operators +func parseSearchTerm(word string) searchTerm { + if word == "" { + return searchTerm{} + } + + term := searchTerm{word: word} + + // Check for leading operators + if word[0] == '+' { + term.isRequired = true + word = word[1:] + } else if word[0] == '-' { + term.isExcluded = true + word = word[1:] + } + + // Check for trailing wildcard + if len(word) > 0 && word[len(word)-1] == '*' { + term.isPrefixMatch = true + word = word[:len(word)-1] + } + + term.word = word + return term +} + +// convertMatchAgainstToLike converts a MATCH...AGAINST expression to LIKE predicates +func (er *expressionRewriter) convertMatchAgainstToLike( + columns []expression.Expression, + searchText string, + modifier ast.FulltextSearchModifier, +) (expression.Expression, error) { + if len(columns) == 0 { + return nil, expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with no columns") + } + + if searchText == "" { + // Empty search string matches nothing + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + + var columnPredicates []expression.Expression + + if modifier.IsBooleanMode() { + // Parse Boolean mode search string + terms := parseBooleanSearchString(searchText) + if len(terms) == 0 { + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + + // Group terms by type + var required, excluded, optional []searchTerm + for _, term := range terms { + if term.word == "" { + continue + } + if term.isRequired { + required = append(required, term) + } else if term.isExcluded { + excluded = append(excluded, term) + } else { + optional = append(optional, term) + } + } + + // Build predicates for each column + for _, column := range columns { + var predicates []expression.Expression + + // AND all required terms + for _, term := range required { + pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch, term.isPhrase) + if err != nil { + return nil, err + } + predicates = append(predicates, pred) + } + + // AND NOT all excluded terms + for _, term := range excluded { + pred, err := er.buildLikePredicate(column, term.word, true, term.isPrefixMatch, term.isPhrase) + if err != nil { + return nil, err + } + predicates = append(predicates, pred) + } + + // OR all optional terms (if any) + if len(optional) > 0 { + var optionalPreds []expression.Expression + for _, term := range optional { + pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch, term.isPhrase) + if err != nil { + return nil, err + } + optionalPreds = append(optionalPreds, pred) + } + if len(optionalPreds) > 0 { + predicates = append(predicates, expression.ComposeDNFCondition(er.sctx, optionalPreds...)) + } + } + + // If we have any predicates for this column, combine them with AND + if len(predicates) > 0 { + columnPredicates = append(columnPredicates, expression.ComposeCNFCondition(er.sctx, predicates...)) + } + } + } else { + // Natural Language Mode: split into words and OR them together + words := strings.Fields(searchText) + if len(words) == 0 { + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + + for _, column := range columns { + var wordPredicates []expression.Expression + for _, word := range words { + pred, err := er.buildLikePredicate(column, word, false, false, false) + if err != nil { + return nil, err + } + wordPredicates = append(wordPredicates, pred) + } + if len(wordPredicates) > 0 { + columnPredicates = append(columnPredicates, expression.ComposeDNFCondition(er.sctx, wordPredicates...)) + } + } + } + + // OR across all columns + if len(columnPredicates) == 0 { + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + + return expression.ComposeDNFCondition(er.sctx, columnPredicates...), nil +} + +// buildLikePredicate builds a single LIKE predicate for a column and search term +func (er *expressionRewriter) buildLikePredicate( + column expression.Expression, + term string, + isNegated bool, + isPrefixMatch bool, + isPhrase bool, +) (expression.Expression, error) { + // Build the pattern + var pattern string + if isPhrase { + // Exact phrase: %term% + pattern = "%" + term + "%" + } else if isPrefixMatch { + // Prefix match: term% + pattern = term + "%" + } else { + // General match: %term% + pattern = "%" + term + "%" + } + + // Create constant for pattern + patternConst := &expression.Constant{ + Value: types.NewStringDatum(pattern), + RetType: types.NewFieldType(mysql.TypeVarchar), + } + + // Create escape constant (backslash = 92) + escapeConst := &expression.Constant{ + Value: types.NewIntDatum(92), + RetType: types.NewFieldType(mysql.TypeTiny), + } + + // Build LIKE function + likeFunc, err := er.newFunction(ast.Like, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) + if err != nil { + return nil, err + } + + // Apply NOT if needed + if isNegated { + notFunc, err := er.newFunction(ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), likeFunc) + if err != nil { + return nil, err + } + return notFunc, nil + } + + return likeFunc, nil +} diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go new file mode 100644 index 0000000000000..38d8675fb1527 --- /dev/null +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -0,0 +1,131 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseBooleanSearchString(t *testing.T) { + tests := []struct { + input string + expected []searchTerm + }{ + { + input: "+apple +pie", + expected: []searchTerm{ + {word: "apple", isRequired: true}, + {word: "pie", isRequired: true}, + }, + }, + { + input: "+apple -cherry", + expected: []searchTerm{ + {word: "apple", isRequired: true}, + {word: "cherry", isExcluded: true}, + }, + }, + { + input: "apple*", + expected: []searchTerm{ + {word: "apple", isPrefixMatch: true}, + }, + }, + { + input: `"exact phrase"`, + expected: []searchTerm{ + {word: "exact phrase", isPhrase: true}, + }, + }, + { + input: `+database +mysql -oracle "full text"`, + expected: []searchTerm{ + {word: "database", isRequired: true}, + {word: "mysql", isRequired: true}, + {word: "oracle", isExcluded: true}, + {word: "full text", isPhrase: true}, + }, + }, + { + input: "word1 word2 word3", + expected: []searchTerm{ + {word: "word1"}, + {word: "word2"}, + {word: "word3"}, + }, + }, + { + input: "+word1* -word2", + expected: []searchTerm{ + {word: "word1", isRequired: true, isPrefixMatch: true}, + {word: "word2", isExcluded: true}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseBooleanSearchString(tt.input) + require.Equal(t, len(tt.expected), len(result), "Number of terms should match") + for i, expected := range tt.expected { + require.Equal(t, expected.word, result[i].word, "Word should match") + require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") + require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") + require.Equal(t, expected.isPrefixMatch, result[i].isPrefixMatch, "isPrefixMatch should match") + require.Equal(t, expected.isPhrase, result[i].isPhrase, "isPhrase should match") + } + }) + } +} + +func TestParseSearchTerm(t *testing.T) { + tests := []struct { + input string + expected searchTerm + }{ + { + input: "+word", + expected: searchTerm{word: "word", isRequired: true}, + }, + { + input: "-word", + expected: searchTerm{word: "word", isExcluded: true}, + }, + { + input: "word*", + expected: searchTerm{word: "word", isPrefixMatch: true}, + }, + { + input: "+word*", + expected: searchTerm{word: "word", isRequired: true, isPrefixMatch: true}, + }, + { + input: "word", + expected: searchTerm{word: "word"}, + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseSearchTerm(tt.input) + require.Equal(t, tt.expected.word, result.word, "Word should match") + require.Equal(t, tt.expected.isRequired, result.isRequired, "isRequired should match") + require.Equal(t, tt.expected.isExcluded, result.isExcluded, "isExcluded should match") + require.Equal(t, tt.expected.isPrefixMatch, result.isPrefixMatch, "isPrefixMatch should match") + }) + } +} diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index c1ba42feab382..31abb2ab36143 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -341,6 +341,10 @@ const ( // TiDBOptEnableCorrelationAdjustment is used to indicates if enable correlation adjustment. TiDBOptEnableCorrelationAdjustment = "tidb_opt_enable_correlation_adjustment" + // TiDBOptFulltextSearchFallback controls the behavior when MATCH...AGAINST syntax is used. + // Options: 'like' (convert to LIKE predicates, default), 'error' (throw error if no fulltext index). + TiDBOptFulltextSearchFallback = "tidb_opt_fulltext_search_fallback" + // TiDBOptLimitPushDownThreshold determines if push Limit or TopN down to TiKV forcibly. TiDBOptLimitPushDownThreshold = "tidb_opt_limit_push_down_threshold" @@ -1405,6 +1409,7 @@ const ( DefOptMPPOuterJoinFixedBuildSide = false DefOptWriteRowID = false DefOptEnableCorrelationAdjustment = true + DefOptFulltextSearchFallback = "like" DefOptLimitPushDownThreshold = 5000 DefOptCorrelationThreshold = 0.9 DefOptCorrelationExpFactor = 1 diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go index bd39a712bfcc5..c8518d089991d 100644 --- a/pkg/sessionctx/variable/session.go +++ b/pkg/sessionctx/variable/session.go @@ -1019,6 +1019,9 @@ type SessionVars struct { // EnableCorrelationAdjustment is used to indicate if correlation adjustment is enabled. EnableCorrelationAdjustment bool + // FulltextSearchFallback controls the behavior when MATCH...AGAINST syntax is used. + // Options: 'like' (convert to LIKE predicates), 'error' (throw error if no fulltext index). + FulltextSearchFallback string // CorrelationExpFactor is used to control the heuristic approach of row count estimation when CorrelationThreshold is not met. CorrelationExpFactor int @@ -2276,6 +2279,7 @@ func NewSessionVars(hctx HookContext) *SessionVars { allowInSubqToJoinAndAgg: vardef.DefOptInSubqToJoinAndAgg, preferRangeScan: vardef.DefOptPreferRangeScan, EnableCorrelationAdjustment: vardef.DefOptEnableCorrelationAdjustment, + FulltextSearchFallback: vardef.DefOptFulltextSearchFallback, LimitPushDownThreshold: vardef.DefOptLimitPushDownThreshold, CorrelationThreshold: vardef.DefOptCorrelationThreshold, CorrelationExpFactor: vardef.DefOptCorrelationExpFactor, diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index 0de3f26abde8e..74d94462911e2 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -2229,6 +2229,10 @@ var defaultSysVars = []*SysVar{ s.EnableCorrelationAdjustment = TiDBOptOn(val) return nil }}, + {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptFulltextSearchFallback, Value: vardef.DefOptFulltextSearchFallback, Type: vardef.TypeEnum, PossibleValues: []string{"like", "error"}, SetSession: func(s *SessionVars, val string) error { + s.FulltextSearchFallback = val + return nil + }}, {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptCorrelationExpFactor, Value: strconv.Itoa(vardef.DefOptCorrelationExpFactor), Type: vardef.TypeUnsigned, MinValue: 0, MaxValue: math.MaxInt32, SetSession: func(s *SessionVars, val string) error { s.CorrelationExpFactor = int(TidbOptInt64(val, vardef.DefOptCorrelationExpFactor)) return nil diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test new file mode 100644 index 0000000000000..58c06b69014b3 --- /dev/null +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -0,0 +1,55 @@ +# Test cases for MATCH...AGAINST to LIKE conversion + +# Setup +set tidb_cost_model_version=1; +set @@tidb_opt_fulltext_search_fallback='like'; +drop table if exists articles; +create table articles (id int primary key, title varchar(200), body text); +insert into articles values + (1, 'MySQL Tutorial', 'This tutorial provides a basic MySQL tutorial'), + (2, 'How To Use MySQL Well', 'After you went through a MySQL tutorial'), + (3, 'Optimizing MySQL', 'In this tutorial we will show how to optimize MySQL'), + (4, 'MySQL vs. PostgreSQL', 'This article compares MySQL and PostgreSQL'), + (5, 'MySQL Security', 'How to secure your MySQL database'); + +# Test 1: Natural Language Mode - Single Column +select id, title from articles where match(title) against('MySQL tutorial'); + +# Test 2: Natural Language Mode - Multiple Columns +select id, title from articles where match(title, body) against('MySQL tutorial'); + +# Test 3: Boolean Mode - Required Terms +select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); + +# Test 4: Boolean Mode - Excluded Terms +select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); + +# Test 5: Boolean Mode - Prefix Wildcard +select id, title from articles where match(title) against('Optim*' in boolean mode); + +# Test 6: Boolean Mode - Exact Phrase +select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); + +# Test 7: Boolean Mode - Complex Query +select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); + +# Test 8: Boolean Mode - Optional Terms +select id, title from articles where match(title) against('tutorial security' in boolean mode); + +# Test 9: Empty Search String +select id, title from articles where match(title) against(''); + +# Test 10: Test with error mode +set @@tidb_opt_fulltext_search_fallback='error'; +-- error 8200 +select id, title from articles where match(title) against('MySQL'); + +# Test 11: Switch back to like mode +set @@tidb_opt_fulltext_search_fallback='like'; +select id, title from articles where match(title) against('MySQL'); + +# Test 12: Natural Language Mode with single word +select id, title from articles where match(title) against('PostgreSQL'); + +# Cleanup +drop table if exists articles; From 4ded0d584c8cb9033e6e6caa06d03c2544c1fc95 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 17 Jan 2026 20:44:46 -0800 Subject: [PATCH 02/42] build errors --- pkg/planner/core/expression_rewriter.go | 33 ++++---- pkg/planner/core/fulltext_to_like.go | 24 +++++- pkg/planner/core/fulltext_to_like_test.go | 43 ++++++++++ .../r/planner/core/fulltext_search.result | 83 +++++++++++++++++++ .../t/planner/core/fulltext_search.test | 26 +++++- 5 files changed, 187 insertions(+), 22 deletions(-) create mode 100644 tests/integrationtest/r/planner/core/fulltext_search.result diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 3d3b61d263bdd..9b84fe69ade5e 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2233,16 +2233,18 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { return } - // The Against expression has been visited and should be on the ctxStack - // Pop it from the stack + // Both the column expressions and Against expression have been visited + // and pushed onto the ctxStack. The stack layout is: + // [..., col1, col2, ..., colN, against] + numColumns := len(v.ColumnNames) l := len(er.ctxStack) - if l < 1 { - er.err = errors.Errorf("MATCH...AGAINST: expected Against expression on stack") + if l < numColumns+1 { + er.err = errors.Errorf("MATCH...AGAINST: expected %d column expressions and Against expression on stack, got %d", numColumns+1, l) return } + // The Against expression is the last one on the stack againstExpr := er.ctxStack[l-1] - er.ctxStackPop(1) // Check if it's a constant string constExpr, ok := againstExpr.(*expression.Constant) @@ -2262,21 +2264,16 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { return } - // Resolve column expressions - var columns []expression.Expression - for _, colName := range v.ColumnNames { - idx, err := expression.FindFieldName(er.names, colName) - if err != nil { - er.err = err - return - } - if idx < 0 { - er.err = errors.Errorf("Unknown column '%s' in MATCH...AGAINST", colName.Name.O) - return - } - columns = append(columns, er.schema.Columns[idx]) + // Get the column expressions from the stack + // They're at positions [l-numColumns-1 : l-1] + columns := make([]expression.Expression, numColumns) + for i := 0; i < numColumns; i++ { + columns[i] = er.ctxStack[l-numColumns-1+i] } + // Pop all column expressions and the Against expression + er.ctxStackPop(numColumns + 1) + // Convert to LIKE predicates result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) if err != nil { diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 7654505c87ba2..2a442adb4f920 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -243,6 +243,21 @@ func (er *expressionRewriter) convertMatchAgainstToLike( return expression.ComposeDNFCondition(er.sctx, columnPredicates...), nil } +// escapeLikePattern escapes special LIKE characters (%, _, \) in the search term +// so they are treated as literal characters rather than wildcards +func escapeLikePattern(term string) string { + var result strings.Builder + result.Grow(len(term)) + for i := 0; i < len(term); i++ { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + result.WriteByte('\\') + } + result.WriteByte(ch) + } + return result.String() +} + // buildLikePredicate builds a single LIKE predicate for a column and search term func (er *expressionRewriter) buildLikePredicate( column expression.Expression, @@ -251,17 +266,20 @@ func (er *expressionRewriter) buildLikePredicate( isPrefixMatch bool, isPhrase bool, ) (expression.Expression, error) { + // Escape special LIKE characters in the search term + escapedTerm := escapeLikePattern(term) + // Build the pattern var pattern string if isPhrase { // Exact phrase: %term% - pattern = "%" + term + "%" + pattern = "%" + escapedTerm + "%" } else if isPrefixMatch { // Prefix match: term% - pattern = term + "%" + pattern = escapedTerm + "%" } else { // General match: %term% - pattern = "%" + term + "%" + pattern = "%" + escapedTerm + "%" } // Create constant for pattern diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 38d8675fb1527..72106c5373519 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -129,3 +129,46 @@ func TestParseSearchTerm(t *testing.T) { }) } } + +func TestEscapeLikePattern(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + input: "normal text", + expected: "normal text", + }, + { + input: "100%", + expected: "100\\%", + }, + { + input: "test_file", + expected: "test\\_file", + }, + { + input: "path\\to\\file", + expected: "path\\\\to\\\\file", + }, + { + input: "mix_%_all", + expected: "mix\\_\\%\\_all", + }, + { + input: "\\%_", + expected: "\\\\\\%\\_", + }, + { + input: "", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := escapeLikePattern(tt.input) + require.Equal(t, tt.expected, result, "Escaped pattern should match") + }) + } +} diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result new file mode 100644 index 0000000000000..8e20b7e5fc06f --- /dev/null +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -0,0 +1,83 @@ +set tidb_cost_model_version=1; +set @@tidb_opt_fulltext_search_fallback='like'; +drop table if exists articles; +create table articles (id int primary key, title varchar(200), body text); +insert into articles values +(1, 'MySQL Tutorial', 'This tutorial provides a basic MySQL tutorial'), +(2, 'How To Use MySQL Well', 'After you went through a MySQL tutorial'), +(3, 'Optimizing MySQL', 'In this tutorial we will show how to optimize MySQL'), +(4, 'MySQL vs. PostgreSQL', 'This article compares MySQL and PostgreSQL'), +(5, 'MySQL Security', 'How to secure your MySQL database'); +select id, title from articles where match(title) against('MySQL tutorial'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title, body) against('MySQL tutorial'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); +id title +select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('Optim*' in boolean mode); +id title +3 Optimizing MySQL +select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); +id title +5 MySQL Security +select id, title from articles where match(title) against('tutorial security' in boolean mode); +id title +select id, title from articles where match(title) against(''); +id title +set @@tidb_opt_fulltext_search_fallback='error'; +select id, title from articles where match(title) against('MySQL'); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST without fulltext index' +set @@tidb_opt_fulltext_search_fallback='like'; +select id, title from articles where match(title) against('MySQL'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +drop table if exists special_chars; +create table special_chars (id int primary key, content varchar(200)); +insert into special_chars values +(1, 'Progress is at 100%'), +(2, 'Progress is at 50%'), +(3, 'File name is test_file.txt'), +(4, 'Path is C:\\Windows\\System32'), +(5, 'Normal text without special chars'); +select id, content from special_chars where match(content) against('100%'); +id content +1 Progress is at 100% +select id, content from special_chars where match(content) against('test_file'); +id content +3 File name is test_file.txt +select id, content from special_chars where match(content) against('C:\\Windows'); +id content +4 Path is C:\Windows\System32 +select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); +id content +1 Progress is at 100% +drop table if exists special_chars; +drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index 58c06b69014b3..ebf01a510c153 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -41,7 +41,7 @@ select id, title from articles where match(title) against(''); # Test 10: Test with error mode set @@tidb_opt_fulltext_search_fallback='error'; --- error 8200 +-- error 1235 select id, title from articles where match(title) against('MySQL'); # Test 11: Switch back to like mode @@ -51,5 +51,29 @@ select id, title from articles where match(title) against('MySQL'); # Test 12: Natural Language Mode with single word select id, title from articles where match(title) against('PostgreSQL'); +# Test 13: Test escaping of special LIKE characters +drop table if exists special_chars; +create table special_chars (id int primary key, content varchar(200)); +insert into special_chars values + (1, 'Progress is at 100%'), + (2, 'Progress is at 50%'), + (3, 'File name is test_file.txt'), + (4, 'Path is C:\\Windows\\System32'), + (5, 'Normal text without special chars'); + +# Test searching for literal % character +select id, content from special_chars where match(content) against('100%'); + +# Test searching for literal _ character +select id, content from special_chars where match(content) against('test_file'); + +# Test searching for literal \ character +select id, content from special_chars where match(content) against('C:\\Windows'); + +# Test Boolean mode with special characters +select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); + +drop table if exists special_chars; + # Cleanup drop table if exists articles; From 50510499fb74cb20c4e47a8ca8ace467a4f1ac86 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 17 Jan 2026 21:03:50 -0800 Subject: [PATCH 03/42] build errors2 --- pkg/planner/core/expression_rewriter.go | 2 +- pkg/planner/core/fulltext_to_like.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 9b84fe69ade5e..eef6b22e462fd 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2267,7 +2267,7 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { // Get the column expressions from the stack // They're at positions [l-numColumns-1 : l-1] columns := make([]expression.Expression, numColumns) - for i := 0; i < numColumns; i++ { + for i := range numColumns { columns[i] = er.ctxStack[l-numColumns-1+i] } diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 2a442adb4f920..e7d4db913f970 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -248,7 +248,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( func escapeLikePattern(term string) string { var result strings.Builder result.Grow(len(term)) - for i := 0; i < len(term); i++ { + for i := range len(term) { ch := term[i] if ch == '\\' || ch == '%' || ch == '_' { result.WriteByte('\\') From a11e4363d3781e40b82924451f6c20afb713a5a1 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 17 Jan 2026 22:24:23 -0800 Subject: [PATCH 04/42] testcase1 --- pkg/expression/integration_test/integration_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 3f7309d141b36..f6b2f43055007 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -207,7 +207,12 @@ func TestFTSSyntax(t *testing.T) { // tk.MustContainErrMsg("select * from t where (fts_match_word('hello', title)) > 0", "Currently 'FTS_MATCH_WORD()' must be used alone") // tk.MustContainErrMsg("select (fts_match_word('hello', title)) AS score from t where fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' cannot be used in SELECT fields") tk.MustContainErrMsg("select * from t where match() against ('hello')", `You have an error in your SQL syntax`) - tk.MustContainErrMsg("select * from t where match(title) against ('hello' in boolean mode)", `UnknownType: *ast.MatchAgainst`) + // Test MATCH...AGAINST with default 'like' fallback mode - should succeed + tk.MustQuery("select * from t where match(title) against ('hello' in boolean mode)") + // Test MATCH...AGAINST with 'error' fallback mode - should fail + tk.MustExec("set @@tidb_opt_fulltext_search_fallback='error'") + tk.MustContainErrMsg("select * from t where match(title) against ('hello' in boolean mode)", `This version of TiDB doesn't yet support 'MATCH...AGAINST without fulltext index'`) + tk.MustExec("set @@tidb_opt_fulltext_search_fallback='like'") tk.MustContainErrMsg("select * from t where fts_match_word(title, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word(45.67, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word('hello', title, body)", `Incorrect parameter count in the call to native function`) From 3379ed59c33bf9672d01c2693fbe2eff77ebeed5 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 09:05:44 -0800 Subject: [PATCH 05/42] testcase2 --- pkg/planner/core/fulltext_to_like_test.go | 21 +++++++++++++ .../r/planner/core/fulltext_search.result | 30 +++++++++++++++++++ .../t/planner/core/fulltext_search.test | 26 ++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 72106c5373519..237cb850f2f58 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -75,6 +75,19 @@ func TestParseBooleanSearchString(t *testing.T) { {word: "word2", isExcluded: true}, }, }, + { + input: `"unclosed quote`, + expected: []searchTerm{ + {word: "unclosed quote", isPhrase: true}, + }, + }, + { + input: "word1\t\nword2", + expected: []searchTerm{ + {word: "word1"}, + {word: "word2"}, + }, + }, } for _, tt := range tests { @@ -117,6 +130,14 @@ func TestParseSearchTerm(t *testing.T) { input: "word", expected: searchTerm{word: "word"}, }, + { + input: "", + expected: searchTerm{word: ""}, + }, + { + input: "+*", + expected: searchTerm{word: "", isRequired: true, isPrefixMatch: true}, + }, } for _, tt := range tests { diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index 8e20b7e5fc06f..9be070a4424a2 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -80,4 +80,34 @@ select id, content from special_chars where match(content) against('+100% +Progr id content 1 Progress is at 100% drop table if exists special_chars; +select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); +id title +select id, title from articles where match(title) against('+MySQL +tutorial +-Security' in boolean mode); +id title +select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); +id title +select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); +id title +1 MySQL Tutorial +3 Optimizing MySQL +select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); +id title +2 How To Use MySQL Well +3 Optimizing MySQL +select id, title from articles where match(title) against(' +'); +id title +select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index ebf01a510c153..a68d1b7a2f051 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -75,5 +75,31 @@ select id, content from special_chars where match(content) against('+100% +Progr drop table if exists special_chars; +# Test 14: Boolean mode - only excluded terms (no required/optional) +select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); + +# Test 15: Boolean mode - unclosed quote (should treat as phrase) +select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); + +# Test 16: Boolean mode - mixed whitespace (tabs and newlines) +select id, title from articles where match(title) against('+MySQL +tutorial +-Security' in boolean mode); + +# Test 17: Boolean mode - empty word after operator removal (+* should be ignored) +select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); + +# Test 18: Boolean mode - multiple excluded terms +select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); + +# Test 19: Boolean mode - all term types combined +select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); + +# Test 20: Natural language mode - only whitespace +select id, title from articles where match(title) against(' + '); + +# Test 21: Natural language mode - multiple spaces between words +select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); + # Cleanup drop table if exists articles; From ba0f3d7db08bb2e906f20d818f0d9f18398eb2a3 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 09:56:43 -0800 Subject: [PATCH 06/42] review1 --- pkg/planner/core/fulltext_to_like.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index e7d4db913f970..051fbed2f7a09 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -271,14 +271,11 @@ func (er *expressionRewriter) buildLikePredicate( // Build the pattern var pattern string - if isPhrase { - // Exact phrase: %term% - pattern = "%" + escapedTerm + "%" - } else if isPrefixMatch { + if isPrefixMatch { // Prefix match: term% pattern = escapedTerm + "%" } else { - // General match: %term% + // General match (words) or exact phrase: %term% pattern = "%" + escapedTerm + "%" } From 82416d87017ce0925727eae5f0bf80db385bcfae Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 10:59:17 -0800 Subject: [PATCH 07/42] review2 --- pkg/planner/core/fulltext_to_like.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 051fbed2f7a09..b6fd8d884a143 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -171,7 +171,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( // AND all required terms for _, term := range required { - pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch, term.isPhrase) + pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) if err != nil { return nil, err } @@ -180,7 +180,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( // AND NOT all excluded terms for _, term := range excluded { - pred, err := er.buildLikePredicate(column, term.word, true, term.isPrefixMatch, term.isPhrase) + pred, err := er.buildLikePredicate(column, term.word, true, term.isPrefixMatch) if err != nil { return nil, err } @@ -191,7 +191,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( if len(optional) > 0 { var optionalPreds []expression.Expression for _, term := range optional { - pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch, term.isPhrase) + pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) if err != nil { return nil, err } @@ -220,7 +220,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( for _, column := range columns { var wordPredicates []expression.Expression for _, word := range words { - pred, err := er.buildLikePredicate(column, word, false, false, false) + pred, err := er.buildLikePredicate(column, word, false, false) if err != nil { return nil, err } @@ -264,7 +264,6 @@ func (er *expressionRewriter) buildLikePredicate( term string, isNegated bool, isPrefixMatch bool, - isPhrase bool, ) (expression.Expression, error) { // Escape special LIKE characters in the search term escapedTerm := escapeLikePattern(term) From f7b1fa58e7672a30f47033fbfaa586e904342a66 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 11:45:14 -0800 Subject: [PATCH 08/42] review3 --- pkg/planner/core/fulltext_to_like.go | 107 +++++++++++++++++++-------- 1 file changed, 78 insertions(+), 29 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index b6fd8d884a143..89675b9ec73f2 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -46,6 +46,11 @@ func parseBooleanSearchString(text string) []searchTerm { case '"': if inQuote { // End of phrase + // NOTE: Phrase matching in MySQL full-text search finds the exact phrase as a sequence + // of words (word boundaries are enforced). Using LIKE %phrase%, we cannot perfectly + // enforce word boundaries without REGEXP. For example, "quick brown" would match + // "aquick brownie" which MySQL full-text search would not match. This is an acceptable + // limitation for a fallback implementation. phrase := current.String() if phrase != "" { terms = append(terms, searchTerm{ @@ -121,6 +126,22 @@ func parseSearchTerm(word string) searchTerm { } // convertMatchAgainstToLike converts a MATCH...AGAINST expression to LIKE predicates +// +// This is a fallback implementation since TiDB does not natively support full-text search. +// It provides basic text matching capabilities but has the following semantic differences +// from MySQL's full-text search: +// +// 1. No relevance scoring - returns 1 for match, 0 for no match (MySQL returns a relevance score) +// 2. No stop word filtering - searches for all words regardless of length or commonness +// 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) +// 4. No word boundaries - LIKE %word% matches within words (e.g., "cat" matches "concatenate") +// - Affects prefix wildcard: "Optim*" matches "reOptimizing" (MySQL would not match) +// - Affects phrase matching: "quick brown" matches "aquick brownie" (MySQL would not match) +// 5. Case sensitivity - follows column collation (MySQL full-text search is case-insensitive) +// 6. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) +// +// Supported Boolean mode operators: + (required), - (excluded), * (prefix wildcard), "..." (phrase) +// Unsupported operators: ~ (negation with ranking), > < (relevance modifiers), () (grouping) func (er *expressionRewriter) convertMatchAgainstToLike( columns []expression.Expression, searchText string, @@ -165,48 +186,75 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } } - // Build predicates for each column - for _, column := range columns { - var predicates []expression.Expression + // Build predicates with correct Boolean logic for multiple columns + // In MySQL, MATCH(col1, col2) AGAINST('+word1 +word2') means: + // - word1 must appear in (col1 OR col2) + // - word2 must appear in (col1 OR col2) + var allPredicates []expression.Expression - // AND all required terms - for _, term := range required { + // For each required term: (col1 LIKE %term% OR col2 LIKE %term%) + for _, term := range required { + var termColumnPreds []expression.Expression + for _, column := range columns { pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) if err != nil { return nil, err } - predicates = append(predicates, pred) + termColumnPreds = append(termColumnPreds, pred) + } + // At least one column must match this required term + if len(termColumnPreds) > 0 { + allPredicates = append(allPredicates, expression.ComposeDNFCondition(er.sctx, termColumnPreds...)) } + } - // AND NOT all excluded terms - for _, term := range excluded { - pred, err := er.buildLikePredicate(column, term.word, true, term.isPrefixMatch) + // For each excluded term: NOT(col1 LIKE %term% OR col2 LIKE %term%) + for _, term := range excluded { + var termColumnPreds []expression.Expression + for _, column := range columns { + pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + // None of the columns should match this excluded term + if len(termColumnPreds) > 0 { + notPred, err := er.newFunction(ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), + expression.ComposeDNFCondition(er.sctx, termColumnPreds...)) if err != nil { return nil, err } - predicates = append(predicates, pred) + allPredicates = append(allPredicates, notPred) } + } - // OR all optional terms (if any) - if len(optional) > 0 { - var optionalPreds []expression.Expression - for _, term := range optional { + // For optional terms: OR across all term-column combinations + if len(optional) > 0 { + var allOptionalPreds []expression.Expression + for _, term := range optional { + for _, column := range columns { pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) if err != nil { return nil, err } - optionalPreds = append(optionalPreds, pred) - } - if len(optionalPreds) > 0 { - predicates = append(predicates, expression.ComposeDNFCondition(er.sctx, optionalPreds...)) + allOptionalPreds = append(allOptionalPreds, pred) } } - - // If we have any predicates for this column, combine them with AND - if len(predicates) > 0 { - columnPredicates = append(columnPredicates, expression.ComposeCNFCondition(er.sctx, predicates...)) + if len(allOptionalPreds) > 0 { + allPredicates = append(allPredicates, expression.ComposeDNFCondition(er.sctx, allOptionalPreds...)) } } + + // AND all predicates together + if len(allPredicates) == 0 { + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + + return expression.ComposeCNFCondition(er.sctx, allPredicates...), nil } else { // Natural Language Mode: split into words and OR them together words := strings.Fields(searchText) @@ -269,14 +317,15 @@ func (er *expressionRewriter) buildLikePredicate( escapedTerm := escapeLikePattern(term) // Build the pattern + // NOTE: Prefix matching (word*) in MySQL full-text search matches words that START with + // the prefix, but the word can appear anywhere in the text. For example, "Optim*" should + // match "Optimizing MySQL" but NOT "reOptimizing". Using LIKE without REGEXP, we cannot + // perfectly enforce word-start boundaries. We use %term% which may produce false positives + // (matching mid-word like "reOptimizing"), but avoids false negatives. This is an acceptable + // limitation for a fallback implementation. var pattern string - if isPrefixMatch { - // Prefix match: term% - pattern = escapedTerm + "%" - } else { - // General match (words) or exact phrase: %term% - pattern = "%" + escapedTerm + "%" - } + // Both prefix and general matches use %term% to find the term anywhere in text + pattern = "%" + escapedTerm + "%" // Create constant for pattern patternConst := &expression.Constant{ From c63c6bb5c7bfadf08f4bf5c891ba786dcd69a114 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 12:10:50 -0800 Subject: [PATCH 09/42] review4 --- pkg/planner/core/fulltext_to_like.go | 50 ++++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 89675b9ec73f2..3adaf6d5e0dff 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -135,8 +135,9 @@ func parseSearchTerm(word string) searchTerm { // 2. No stop word filtering - searches for all words regardless of length or commonness // 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) // 4. No word boundaries - LIKE %word% matches within words (e.g., "cat" matches "concatenate") -// - Affects prefix wildcard: "Optim*" matches "reOptimizing" (MySQL would not match) -// - Affects phrase matching: "quick brown" matches "aquick brownie" (MySQL would not match) +// - Affects prefix wildcard: "Optim*" matches "reOptimizing" (MySQL would not match) +// - Affects phrase matching: "quick brown" matches "aquick brownie" (MySQL would not match) +// // 5. Case sensitivity - follows column collation (MySQL full-text search is case-insensitive) // 6. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) // @@ -255,28 +256,28 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } return expression.ComposeCNFCondition(er.sctx, allPredicates...), nil - } else { - // Natural Language Mode: split into words and OR them together - words := strings.Fields(searchText) - if len(words) == 0 { - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } + } - for _, column := range columns { - var wordPredicates []expression.Expression - for _, word := range words { - pred, err := er.buildLikePredicate(column, word, false, false) - if err != nil { - return nil, err - } - wordPredicates = append(wordPredicates, pred) - } - if len(wordPredicates) > 0 { - columnPredicates = append(columnPredicates, expression.ComposeDNFCondition(er.sctx, wordPredicates...)) + // Natural Language Mode: split into words and OR them together + words := strings.Fields(searchText) + if len(words) == 0 { + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + + for _, column := range columns { + var wordPredicates []expression.Expression + for _, word := range words { + pred, err := er.buildLikePredicate(column, word, false, false) + if err != nil { + return nil, err } + wordPredicates = append(wordPredicates, pred) + } + if len(wordPredicates) > 0 { + columnPredicates = append(columnPredicates, expression.ComposeDNFCondition(er.sctx, wordPredicates...)) } } @@ -311,7 +312,7 @@ func (er *expressionRewriter) buildLikePredicate( column expression.Expression, term string, isNegated bool, - isPrefixMatch bool, + _ bool, ) (expression.Expression, error) { // Escape special LIKE characters in the search term escapedTerm := escapeLikePattern(term) @@ -323,9 +324,8 @@ func (er *expressionRewriter) buildLikePredicate( // perfectly enforce word-start boundaries. We use %term% which may produce false positives // (matching mid-word like "reOptimizing"), but avoids false negatives. This is an acceptable // limitation for a fallback implementation. - var pattern string // Both prefix and general matches use %term% to find the term anywhere in text - pattern = "%" + escapedTerm + "%" + pattern := "%" + escapedTerm + "%" // Create constant for pattern patternConst := &expression.Constant{ From 0c74944bc05e64879d1fea5957e2fa414141c53e Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 12:46:12 -0800 Subject: [PATCH 10/42] review5 --- pkg/planner/core/expression_rewriter.go | 20 ++- pkg/planner/core/fulltext_to_like.go | 117 +++++++++++++++++- pkg/planner/core/fulltext_to_like_test.go | 29 +++++ .../r/planner/core/fulltext_search.result | 12 ++ .../t/planner/core/fulltext_search.test | 15 +++ 5 files changed, 186 insertions(+), 7 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index eef6b22e462fd..03414a43626b7 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2220,7 +2220,24 @@ func (er *expressionRewriter) patternLikeOrIlikeToExpression(v *ast.PatternLikeO } func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { - // Check the session variable to determine behavior + // Check if a fulltext index exists for the given columns + var hasIndex bool + if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.is != nil { + var err error + hasIndex, err = hasFulltextIndex(er.planCtx.builder.is, v.ColumnNames) + if err != nil { + er.err = err + return + } + } + + // If a fulltext index exists, TiDB doesn't support it yet + if hasIndex { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with fulltext index (native fulltext search not supported)") + return + } + + // No fulltext index exists - check fallback mode var fallbackMode string if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { fallbackMode = er.planCtx.builder.ctx.GetSessionVars().FulltextSearchFallback @@ -2233,6 +2250,7 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { return } + // Fallback mode is "like" - convert to LIKE predicates // Both the column expressions and Against expression have been visited // and pushed onto the ctxStack. The stack layout is: // [..., col1, col2, ..., colN, against] diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 3adaf6d5e0dff..cb347b5ad6435 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -15,9 +15,12 @@ package core import ( + "slices" "strings" "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/infoschema" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/types" @@ -37,6 +40,8 @@ func parseBooleanSearchString(text string) []searchTerm { var terms []searchTerm var current strings.Builder inQuote := false + phraseIsRequired := false + phraseIsExcluded := false i := 0 for i < len(text) { @@ -54,13 +59,29 @@ func parseBooleanSearchString(text string) []searchTerm { phrase := current.String() if phrase != "" { terms = append(terms, searchTerm{ - word: phrase, - isPhrase: true, + word: phrase, + isRequired: phraseIsRequired, + isExcluded: phraseIsExcluded, + isPhrase: true, }) } current.Reset() inQuote = false + phraseIsRequired = false + phraseIsExcluded = false } else { + // Check for leading operator before the quote (e.g., +"phrase" or -"phrase") + if current.Len() > 0 { + prefix := current.String() + if len(prefix) > 0 { + if prefix[0] == '+' { + phraseIsRequired = true + } else if prefix[0] == '-' { + phraseIsExcluded = true + } + } + current.Reset() + } // Start of phrase inQuote = true } @@ -125,6 +146,75 @@ func parseSearchTerm(word string) searchTerm { return term } +// hasFulltextIndex checks if a fulltext index exists for the given columns +func hasFulltextIndex(is infoschema.InfoSchema, columnNames []*ast.ColumnName) (bool, error) { + if len(columnNames) == 0 { + return false, nil + } + + // All columns in a MATCH clause must be from the same table + // Get the schema and table from the first column + schema := columnNames[0].Schema + tableName := columnNames[0].Table + + // If schema is not specified, we cannot determine the table + // In this case, we'll need to check later during execution + if tableName.L == "" { + return false, nil + } + + // Get the table from info schema + tbl, err := is.TableByName(nil, schema, tableName) + if err != nil { + // Table not found, cannot check for fulltext index + return false, nil + } + + tblInfo := tbl.Meta() + if tblInfo == nil { + return false, nil + } + + // Extract column names from the MATCH clause + matchColumns := make([]string, len(columnNames)) + for i, col := range columnNames { + matchColumns[i] = col.Name.L // Use lowercase for case-insensitive comparison + } + + // Check each index to see if it's a fulltext index covering the exact columns + for _, idx := range tblInfo.Indices { + // Check if this is a fulltext index + if idx.Tp != ast.IndexTypeFulltext { + continue + } + + // Check if the index is in a usable state + if idx.State != model.StatePublic { + continue + } + + // Check if the index covers the exact set of columns + if len(idx.Columns) != len(matchColumns) { + continue + } + + // Extract index column names + idxColumns := make([]string, len(idx.Columns)) + for i, col := range idx.Columns { + idxColumns[i] = col.Name.L + } + + // Check if the columns match (order doesn't matter for MATCH...AGAINST) + slices.Sort(matchColumns) + slices.Sort(idxColumns) + if slices.Equal(matchColumns, idxColumns) { + return true, nil + } + } + + return false, nil +} + // convertMatchAgainstToLike converts a MATCH...AGAINST expression to LIKE predicates // // This is a fallback implementation since TiDB does not natively support full-text search. @@ -134,9 +224,14 @@ func parseSearchTerm(word string) searchTerm { // 1. No relevance scoring - returns 1 for match, 0 for no match (MySQL returns a relevance score) // 2. No stop word filtering - searches for all words regardless of length or commonness // 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) -// 4. No word boundaries - LIKE %word% matches within words (e.g., "cat" matches "concatenate") -// - Affects prefix wildcard: "Optim*" matches "reOptimizing" (MySQL would not match) -// - Affects phrase matching: "quick brown" matches "aquick brownie" (MySQL would not match) +// 4. No word boundaries - LIKE %term% matches substrings anywhere, not just complete words +// - Simple terms: "cat" matches "concatenate", "category", "application" +// (MySQL FTS only matches "cat" as a standalone word) +// - Prefix wildcard: "Optim*" matches "reOptimizing", "Optimizing" +// (MySQL FTS only matches words starting with "Optim" like "Optimizing", not "reOptimizing") +// - Phrase matching: "quick brown" matches "aquick brownie" +// (MySQL FTS only matches the exact phrase with word boundaries) +// This limitation exists because LIKE cannot enforce word boundaries without REGEXP // // 5. Case sensitivity - follows column collation (MySQL full-text search is case-insensitive) // 6. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) @@ -295,8 +390,18 @@ func (er *expressionRewriter) convertMatchAgainstToLike( // escapeLikePattern escapes special LIKE characters (%, _, \) in the search term // so they are treated as literal characters rather than wildcards func escapeLikePattern(term string) string { + // Count special characters to pre-allocate the exact buffer size needed + escapeCount := 0 + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + escapeCount++ + } + } + + // Allocate exact size: original length + number of escape characters var result strings.Builder - result.Grow(len(term)) + result.Grow(len(term) + escapeCount) for i := range len(term) { ch := term[i] if ch == '\\' || ch == '%' || ch == '_' { diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 237cb850f2f58..c3b2a93ede173 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -88,6 +88,35 @@ func TestParseBooleanSearchString(t *testing.T) { {word: "word2"}, }, }, + { + input: `+"required phrase"`, + expected: []searchTerm{ + {word: "required phrase", isRequired: true, isPhrase: true}, + }, + }, + { + input: `-"excluded phrase"`, + expected: []searchTerm{ + {word: "excluded phrase", isExcluded: true, isPhrase: true}, + }, + }, + { + input: `+"required phrase" optional -"excluded phrase"`, + expected: []searchTerm{ + {word: "required phrase", isRequired: true, isPhrase: true}, + {word: "optional"}, + {word: "excluded phrase", isExcluded: true, isPhrase: true}, + }, + }, + { + input: `+word1 +"required phrase" -word2 -"excluded phrase"`, + expected: []searchTerm{ + {word: "word1", isRequired: true}, + {word: "required phrase", isRequired: true, isPhrase: true}, + {word: "word2", isExcluded: true}, + {word: "excluded phrase", isExcluded: true, isPhrase: true}, + }, + }, } for _, tt := range tests { diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index 9be070a4424a2..694224201b9e3 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -110,4 +110,16 @@ id title 3 Optimizing MySQL 4 MySQL vs. PostgreSQL 5 MySQL Security +select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); +id title +1 MySQL Tutorial +select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); +id title +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); +id title +2 How To Use MySQL Well drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index a68d1b7a2f051..8ff4c7a559897 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -101,5 +101,20 @@ select id, title from articles where match(title) against(' # Test 21: Natural language mode - multiple spaces between words select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); +# Test 22: Boolean mode - required phrase with + operator +select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); + +# Test 23: Boolean mode - excluded phrase with - operator +select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); + +# Test 24: Boolean mode - mix of required/excluded phrases and words +select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); + +# NOTE: An additional test would verify that MATCH...AGAINST returns an error when a fulltext +# index exists (since TiDB doesn't natively support fulltext search yet). However, we cannot +# test this in the integration test environment because creating fulltext indexes requires +# TiFlash, which is not available in the test setup. The code properly checks for fulltext +# indexes and returns an error when one is found (see expression_rewriter.go:matchAgainstToExpression). + # Cleanup drop table if exists articles; From dbdbce177c64bed96dc3eb11629471dcc5e7f9a1 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 15:15:41 -0800 Subject: [PATCH 11/42] review6 --- pkg/planner/core/fulltext_to_like.go | 41 +++++++++++-------- pkg/planner/core/fulltext_to_like_test.go | 28 +++++++++++++ .../r/planner/core/fulltext_search.result | 7 ++++ 3 files changed, 60 insertions(+), 16 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index cb347b5ad6435..68170b14fbe57 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -73,12 +73,15 @@ func parseBooleanSearchString(text string) []searchTerm { // Check for leading operator before the quote (e.g., +"phrase" or -"phrase") if current.Len() > 0 { prefix := current.String() - if len(prefix) > 0 { - if prefix[0] == '+' { - phraseIsRequired = true - } else if prefix[0] == '-' { - phraseIsExcluded = true - } + // Only extract operator if prefix is exactly "+" or "-" + // Otherwise, treat it as a regular word + if prefix == "+" { + phraseIsRequired = true + } else if prefix == "-" { + phraseIsExcluded = true + } else { + // Not an operator, parse as a regular word first + terms = append(terms, parseSearchTerm(prefix)) } current.Reset() } @@ -105,10 +108,12 @@ func parseBooleanSearchString(text string) []searchTerm { // Handle remaining content if current.Len() > 0 { if inQuote { - // Unclosed quote, treat as phrase + // Unclosed quote, treat as phrase and preserve operator flags terms = append(terms, searchTerm{ - word: current.String(), - isPhrase: true, + word: current.String(), + isRequired: phraseIsRequired, + isExcluded: phraseIsExcluded, + isPhrase: true, }) } else { word := current.String() @@ -180,6 +185,8 @@ func hasFulltextIndex(is infoschema.InfoSchema, columnNames []*ast.ColumnName) ( for i, col := range columnNames { matchColumns[i] = col.Name.L // Use lowercase for case-insensitive comparison } + // Sort once outside the loop for efficiency + slices.Sort(matchColumns) // Check each index to see if it's a fulltext index covering the exact columns for _, idx := range tblInfo.Indices { @@ -198,15 +205,14 @@ func hasFulltextIndex(is infoschema.InfoSchema, columnNames []*ast.ColumnName) ( continue } - // Extract index column names + // Extract index column names and sort idxColumns := make([]string, len(idx.Columns)) for i, col := range idx.Columns { idxColumns[i] = col.Name.L } + slices.Sort(idxColumns) // Check if the columns match (order doesn't matter for MATCH...AGAINST) - slices.Sort(matchColumns) - slices.Sort(idxColumns) if slices.Equal(matchColumns, idxColumns) { return true, nil } @@ -325,8 +331,10 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } } - // For optional terms: OR across all term-column combinations - if len(optional) > 0 { + // For optional terms: + // - If there are required/excluded terms, ignore optional terms (we can't rank in LIKE fallback) + // - If there are ONLY optional terms, require at least one to match + if len(optional) > 0 && len(required) == 0 && len(excluded) == 0 { var allOptionalPreds []expression.Expression for _, term := range optional { for _, column := range columns { @@ -338,11 +346,12 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } } if len(allOptionalPreds) > 0 { - allPredicates = append(allPredicates, expression.ComposeDNFCondition(er.sctx, allOptionalPreds...)) + // When there are only optional terms, at least one must match + return expression.ComposeDNFCondition(er.sctx, allOptionalPreds...), nil } } - // AND all predicates together + // AND all required/excluded predicates together if len(allPredicates) == 0 { return &expression.Constant{ Value: types.NewIntDatum(0), diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index c3b2a93ede173..9b5b350bd4f4c 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -117,6 +117,34 @@ func TestParseBooleanSearchString(t *testing.T) { {word: "excluded phrase", isExcluded: true, isPhrase: true}, }, }, + { + input: `abc"phrase"`, + expected: []searchTerm{ + {word: "abc"}, + {word: "phrase", isPhrase: true}, + }, + }, + { + input: `word1 abc"phrase" word2`, + expected: []searchTerm{ + {word: "word1"}, + {word: "abc"}, + {word: "phrase", isPhrase: true}, + {word: "word2"}, + }, + }, + { + input: `+"unclosed`, + expected: []searchTerm{ + {word: "unclosed", isRequired: true, isPhrase: true}, + }, + }, + { + input: `-"unclosed phrase`, + expected: []searchTerm{ + {word: "unclosed phrase", isExcluded: true, isPhrase: true}, + }, + }, } for _, tt := range tests { diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index 694224201b9e3..d397370c7da02 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -92,14 +92,21 @@ select id, title from articles where match(title) against('+MySQL +tutorial id title select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); id title 1 MySQL Tutorial 3 Optimizing MySQL select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); id title +1 MySQL Tutorial 2 How To Use MySQL Well 3 Optimizing MySQL +4 MySQL vs. PostgreSQL select id, title from articles where match(title) against(' '); id title From dc10c79a7c083b9361e11208deb9e991b3fa27b3 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 15:51:22 -0800 Subject: [PATCH 12/42] review7 --- pkg/planner/core/fulltext_to_like.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 68170b14fbe57..d10f5b6c3e1a1 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -109,12 +109,15 @@ func parseBooleanSearchString(text string) []searchTerm { if current.Len() > 0 { if inQuote { // Unclosed quote, treat as phrase and preserve operator flags - terms = append(terms, searchTerm{ - word: current.String(), - isRequired: phraseIsRequired, - isExcluded: phraseIsExcluded, - isPhrase: true, - }) + phrase := current.String() + if phrase != "" { + terms = append(terms, searchTerm{ + word: phrase, + isRequired: phraseIsRequired, + isExcluded: phraseIsExcluded, + isPhrase: true, + }) + } } else { word := current.String() terms = append(terms, parseSearchTerm(word)) @@ -237,7 +240,7 @@ func hasFulltextIndex(is infoschema.InfoSchema, columnNames []*ast.ColumnName) ( // (MySQL FTS only matches words starting with "Optim" like "Optimizing", not "reOptimizing") // - Phrase matching: "quick brown" matches "aquick brownie" // (MySQL FTS only matches the exact phrase with word boundaries) -// This limitation exists because LIKE cannot enforce word boundaries without REGEXP +// This limitation exists because LIKE cannot enforce word boundaries without REGEXP // // 5. Case sensitivity - follows column collation (MySQL full-text search is case-insensitive) // 6. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) From 48373d1b027afd67e9b71b4f50df7280245e538f Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 17:29:42 -0800 Subject: [PATCH 13/42] review8 --- .../integration_test/integration_test.go | 2 +- pkg/planner/core/expression_rewriter.go | 28 +++---- pkg/planner/core/fulltext_to_like.go | 73 ------------------- .../r/planner/core/fulltext_search.result | 2 +- 4 files changed, 11 insertions(+), 94 deletions(-) diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index f6b2f43055007..2f4be3aef40fa 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -211,7 +211,7 @@ func TestFTSSyntax(t *testing.T) { tk.MustQuery("select * from t where match(title) against ('hello' in boolean mode)") // Test MATCH...AGAINST with 'error' fallback mode - should fail tk.MustExec("set @@tidb_opt_fulltext_search_fallback='error'") - tk.MustContainErrMsg("select * from t where match(title) against ('hello' in boolean mode)", `This version of TiDB doesn't yet support 'MATCH...AGAINST without fulltext index'`) + tk.MustContainErrMsg("select * from t where match(title) against ('hello' in boolean mode)", `fulltext search not supported`) tk.MustExec("set @@tidb_opt_fulltext_search_fallback='like'") tk.MustContainErrMsg("select * from t where fts_match_word(title, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word(45.67, body)", `match against a non-constant string`) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 03414a43626b7..9dcc7dd7de8b7 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2220,24 +2220,14 @@ func (er *expressionRewriter) patternLikeOrIlikeToExpression(v *ast.PatternLikeO } func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { - // Check if a fulltext index exists for the given columns - var hasIndex bool - if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.is != nil { - var err error - hasIndex, err = hasFulltextIndex(er.planCtx.builder.is, v.ColumnNames) - if err != nil { - er.err = err - return - } - } - - // If a fulltext index exists, TiDB doesn't support it yet - if hasIndex { - er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with fulltext index (native fulltext search not supported)") - return - } - - // No fulltext index exists - check fallback mode + // TODO: Check if a fulltext index exists for the given columns. + // This is currently not implemented because: + // 1. Column expressions at this point in rewriting don't have easy access to table metadata + // 2. Native fulltext search via MATCH...AGAINST is not yet supported in TiDB + // 3. Fulltext indexes in TiDB/TiFlash are used via fts_match_word() function, not MATCH...AGAINST + // When native MATCH...AGAINST support is added, we should error here if a fulltext index exists. + + // Check fallback mode var fallbackMode string if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { fallbackMode = er.planCtx.builder.ctx.GetSessionVars().FulltextSearchFallback @@ -2246,7 +2236,7 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { } if fallbackMode == "error" { - er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST without fulltext index") + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST (fulltext search not supported, set tidb_opt_fulltext_search_fallback='like' to use LIKE fallback)") return } diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index d10f5b6c3e1a1..3002e70786917 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -15,12 +15,9 @@ package core import ( - "slices" "strings" "github.com/pingcap/tidb/pkg/expression" - "github.com/pingcap/tidb/pkg/infoschema" - "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/types" @@ -154,76 +151,6 @@ func parseSearchTerm(word string) searchTerm { return term } -// hasFulltextIndex checks if a fulltext index exists for the given columns -func hasFulltextIndex(is infoschema.InfoSchema, columnNames []*ast.ColumnName) (bool, error) { - if len(columnNames) == 0 { - return false, nil - } - - // All columns in a MATCH clause must be from the same table - // Get the schema and table from the first column - schema := columnNames[0].Schema - tableName := columnNames[0].Table - - // If schema is not specified, we cannot determine the table - // In this case, we'll need to check later during execution - if tableName.L == "" { - return false, nil - } - - // Get the table from info schema - tbl, err := is.TableByName(nil, schema, tableName) - if err != nil { - // Table not found, cannot check for fulltext index - return false, nil - } - - tblInfo := tbl.Meta() - if tblInfo == nil { - return false, nil - } - - // Extract column names from the MATCH clause - matchColumns := make([]string, len(columnNames)) - for i, col := range columnNames { - matchColumns[i] = col.Name.L // Use lowercase for case-insensitive comparison - } - // Sort once outside the loop for efficiency - slices.Sort(matchColumns) - - // Check each index to see if it's a fulltext index covering the exact columns - for _, idx := range tblInfo.Indices { - // Check if this is a fulltext index - if idx.Tp != ast.IndexTypeFulltext { - continue - } - - // Check if the index is in a usable state - if idx.State != model.StatePublic { - continue - } - - // Check if the index covers the exact set of columns - if len(idx.Columns) != len(matchColumns) { - continue - } - - // Extract index column names and sort - idxColumns := make([]string, len(idx.Columns)) - for i, col := range idx.Columns { - idxColumns[i] = col.Name.L - } - slices.Sort(idxColumns) - - // Check if the columns match (order doesn't matter for MATCH...AGAINST) - if slices.Equal(matchColumns, idxColumns) { - return true, nil - } - } - - return false, nil -} - // convertMatchAgainstToLike converts a MATCH...AGAINST expression to LIKE predicates // // This is a fallback implementation since TiDB does not natively support full-text search. diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index d397370c7da02..d527be5f99393 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -47,7 +47,7 @@ select id, title from articles where match(title) against(''); id title set @@tidb_opt_fulltext_search_fallback='error'; select id, title from articles where match(title) against('MySQL'); -Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST without fulltext index' +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST (fulltext search not supported, set tidb_opt_fulltext_search_fallback='like' to use LIKE fallback)' set @@tidb_opt_fulltext_search_fallback='like'; select id, title from articles where match(title) against('MySQL'); id title From 2121e81b53f9616d3108e4c867d5347c15d15a11 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 19:12:38 -0800 Subject: [PATCH 14/42] review9 --- pkg/planner/core/fulltext_to_like.go | 34 ++++++++--------------- pkg/planner/core/fulltext_to_like_test.go | 12 ++++---- 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 3002e70786917..64471d48625cd 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -25,11 +25,13 @@ import ( // searchTerm represents a single term in a Boolean fulltext search query type searchTerm struct { - word string - isRequired bool // Has '+' prefix - isExcluded bool // Has '-' prefix - isPrefixMatch bool // Has '*' suffix - isPhrase bool // Wrapped in quotes + word string + isRequired bool // Has '+' prefix + isExcluded bool // Has '-' prefix + isPhrase bool // Wrapped in quotes + // Note: Prefix wildcards ('*' suffix) are parsed but not used differently from regular terms + // because LIKE %term% already matches the term anywhere. Proper prefix matching would require + // REGEXP to enforce word-start boundaries, which we avoid for simplicity. } // parseBooleanSearchString parses a Boolean mode search string into individual terms @@ -141,9 +143,8 @@ func parseSearchTerm(word string) searchTerm { word = word[1:] } - // Check for trailing wildcard + // Check for trailing wildcard and strip it (we don't use it differently, see struct comment) if len(word) > 0 && word[len(word)-1] == '*' { - term.isPrefixMatch = true word = word[:len(word)-1] } @@ -228,7 +229,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( for _, term := range required { var termColumnPreds []expression.Expression for _, column := range columns { - pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) + pred, err := er.buildLikePredicate(column, term.word) if err != nil { return nil, err } @@ -244,7 +245,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( for _, term := range excluded { var termColumnPreds []expression.Expression for _, column := range columns { - pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) + pred, err := er.buildLikePredicate(column, term.word) if err != nil { return nil, err } @@ -268,7 +269,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( var allOptionalPreds []expression.Expression for _, term := range optional { for _, column := range columns { - pred, err := er.buildLikePredicate(column, term.word, false, term.isPrefixMatch) + pred, err := er.buildLikePredicate(column, term.word) if err != nil { return nil, err } @@ -304,7 +305,7 @@ func (er *expressionRewriter) convertMatchAgainstToLike( for _, column := range columns { var wordPredicates []expression.Expression for _, word := range words { - pred, err := er.buildLikePredicate(column, word, false, false) + pred, err := er.buildLikePredicate(column, word) if err != nil { return nil, err } @@ -355,8 +356,6 @@ func escapeLikePattern(term string) string { func (er *expressionRewriter) buildLikePredicate( column expression.Expression, term string, - isNegated bool, - _ bool, ) (expression.Expression, error) { // Escape special LIKE characters in the search term escapedTerm := escapeLikePattern(term) @@ -389,14 +388,5 @@ func (er *expressionRewriter) buildLikePredicate( return nil, err } - // Apply NOT if needed - if isNegated { - notFunc, err := er.newFunction(ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), likeFunc) - if err != nil { - return nil, err - } - return notFunc, nil - } - return likeFunc, nil } diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 9b5b350bd4f4c..239993b81fe11 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -42,7 +42,7 @@ func TestParseBooleanSearchString(t *testing.T) { { input: "apple*", expected: []searchTerm{ - {word: "apple", isPrefixMatch: true}, + {word: "apple"}, }, }, { @@ -71,7 +71,7 @@ func TestParseBooleanSearchString(t *testing.T) { { input: "+word1* -word2", expected: []searchTerm{ - {word: "word1", isRequired: true, isPrefixMatch: true}, + {word: "word1", isRequired: true}, {word: "word2", isExcluded: true}, }, }, @@ -155,7 +155,6 @@ func TestParseBooleanSearchString(t *testing.T) { require.Equal(t, expected.word, result[i].word, "Word should match") require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") - require.Equal(t, expected.isPrefixMatch, result[i].isPrefixMatch, "isPrefixMatch should match") require.Equal(t, expected.isPhrase, result[i].isPhrase, "isPhrase should match") } }) @@ -177,11 +176,11 @@ func TestParseSearchTerm(t *testing.T) { }, { input: "word*", - expected: searchTerm{word: "word", isPrefixMatch: true}, + expected: searchTerm{word: "word"}, }, { input: "+word*", - expected: searchTerm{word: "word", isRequired: true, isPrefixMatch: true}, + expected: searchTerm{word: "word", isRequired: true}, }, { input: "word", @@ -193,7 +192,7 @@ func TestParseSearchTerm(t *testing.T) { }, { input: "+*", - expected: searchTerm{word: "", isRequired: true, isPrefixMatch: true}, + expected: searchTerm{word: "", isRequired: true}, }, } @@ -203,7 +202,6 @@ func TestParseSearchTerm(t *testing.T) { require.Equal(t, tt.expected.word, result.word, "Word should match") require.Equal(t, tt.expected.isRequired, result.isRequired, "isRequired should match") require.Equal(t, tt.expected.isExcluded, result.isExcluded, "isExcluded should match") - require.Equal(t, tt.expected.isPrefixMatch, result.isPrefixMatch, "isPrefixMatch should match") }) } } From 0b4ba840ca1024a83408fa8e38f0478563dc389d Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 18 Jan 2026 19:32:02 -0800 Subject: [PATCH 15/42] review10 --- pkg/planner/core/fulltext_to_like.go | 10 ++++----- pkg/planner/core/fulltext_to_like_test.go | 27 +++++++++++------------ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 64471d48625cd..1f7ec7821a4b9 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -28,10 +28,10 @@ type searchTerm struct { word string isRequired bool // Has '+' prefix isExcluded bool // Has '-' prefix - isPhrase bool // Wrapped in quotes - // Note: Prefix wildcards ('*' suffix) are parsed but not used differently from regular terms - // because LIKE %term% already matches the term anywhere. Proper prefix matching would require - // REGEXP to enforce word-start boundaries, which we avoid for simplicity. + // Note: Phrases (wrapped in quotes) and prefix wildcards ('*' suffix) are parsed but not + // treated differently from regular terms because LIKE %term% already matches the term anywhere. + // Proper phrase/prefix matching would require REGEXP to enforce word boundaries, which we + // avoid for simplicity. } // parseBooleanSearchString parses a Boolean mode search string into individual terms @@ -61,7 +61,6 @@ func parseBooleanSearchString(text string) []searchTerm { word: phrase, isRequired: phraseIsRequired, isExcluded: phraseIsExcluded, - isPhrase: true, }) } current.Reset() @@ -114,7 +113,6 @@ func parseBooleanSearchString(text string) []searchTerm { word: phrase, isRequired: phraseIsRequired, isExcluded: phraseIsExcluded, - isPhrase: true, }) } } else { diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 239993b81fe11..6c8bcfd0aa823 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -48,7 +48,7 @@ func TestParseBooleanSearchString(t *testing.T) { { input: `"exact phrase"`, expected: []searchTerm{ - {word: "exact phrase", isPhrase: true}, + {word: "exact phrase"}, }, }, { @@ -57,7 +57,7 @@ func TestParseBooleanSearchString(t *testing.T) { {word: "database", isRequired: true}, {word: "mysql", isRequired: true}, {word: "oracle", isExcluded: true}, - {word: "full text", isPhrase: true}, + {word: "full text"}, }, }, { @@ -78,7 +78,7 @@ func TestParseBooleanSearchString(t *testing.T) { { input: `"unclosed quote`, expected: []searchTerm{ - {word: "unclosed quote", isPhrase: true}, + {word: "unclosed quote"}, }, }, { @@ -91,37 +91,37 @@ func TestParseBooleanSearchString(t *testing.T) { { input: `+"required phrase"`, expected: []searchTerm{ - {word: "required phrase", isRequired: true, isPhrase: true}, + {word: "required phrase", isRequired: true}, }, }, { input: `-"excluded phrase"`, expected: []searchTerm{ - {word: "excluded phrase", isExcluded: true, isPhrase: true}, + {word: "excluded phrase", isExcluded: true}, }, }, { input: `+"required phrase" optional -"excluded phrase"`, expected: []searchTerm{ - {word: "required phrase", isRequired: true, isPhrase: true}, + {word: "required phrase", isRequired: true}, {word: "optional"}, - {word: "excluded phrase", isExcluded: true, isPhrase: true}, + {word: "excluded phrase", isExcluded: true}, }, }, { input: `+word1 +"required phrase" -word2 -"excluded phrase"`, expected: []searchTerm{ {word: "word1", isRequired: true}, - {word: "required phrase", isRequired: true, isPhrase: true}, + {word: "required phrase", isRequired: true}, {word: "word2", isExcluded: true}, - {word: "excluded phrase", isExcluded: true, isPhrase: true}, + {word: "excluded phrase", isExcluded: true}, }, }, { input: `abc"phrase"`, expected: []searchTerm{ {word: "abc"}, - {word: "phrase", isPhrase: true}, + {word: "phrase"}, }, }, { @@ -129,20 +129,20 @@ func TestParseBooleanSearchString(t *testing.T) { expected: []searchTerm{ {word: "word1"}, {word: "abc"}, - {word: "phrase", isPhrase: true}, + {word: "phrase"}, {word: "word2"}, }, }, { input: `+"unclosed`, expected: []searchTerm{ - {word: "unclosed", isRequired: true, isPhrase: true}, + {word: "unclosed", isRequired: true}, }, }, { input: `-"unclosed phrase`, expected: []searchTerm{ - {word: "unclosed phrase", isExcluded: true, isPhrase: true}, + {word: "unclosed phrase", isExcluded: true}, }, }, } @@ -155,7 +155,6 @@ func TestParseBooleanSearchString(t *testing.T) { require.Equal(t, expected.word, result[i].word, "Word should match") require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") - require.Equal(t, expected.isPhrase, result[i].isPhrase, "isPhrase should match") } }) } From ab11dda25a5a6e2e1b055400b64c7ff72947b281 Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 7 Apr 2026 10:37:23 -0700 Subject: [PATCH 16/42] refactor --- pkg/expression/builtin.go | 3 +- pkg/expression/builtin_fts.go | 98 ++++++++++++++++++- .../builtin_threadsafe_generated.go | 5 + pkg/expression/distsql_builtin.go | 2 + pkg/expression/function_traits_test.go | 1 + pkg/expression/infer_pushdown.go | 2 +- .../integration_test/integration_test.go | 8 +- pkg/parser/ast/functions.go | 3 +- pkg/planner/core/expression_rewriter.go | 88 ++++++++++------- pkg/planner/optimize.go | 4 + pkg/sessionctx/stmtctx/stmtctx.go | 6 ++ pkg/sessionctx/vardef/tidb_vars.go | 5 - pkg/sessionctx/variable/session.go | 4 - pkg/sessionctx/variable/sysvar.go | 4 - .../r/planner/core/fulltext_search.result | 9 +- .../t/planner/core/fulltext_search.test | 21 ++-- 16 files changed, 188 insertions(+), 75 deletions(-) diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index 86e3bfbbca639..f167205327398 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -980,7 +980,8 @@ var funcs = map[string]functionClass{ ast.VecAsText: &vecAsTextFunctionClass{baseFunctionClass{ast.VecAsText, 1, 1}}, // fts functions - ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, 2}}, + ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, -1}}, + ast.FTSMysqlMatchAgainst: &ftsMysqlMatchAgainstFunctionClass{baseFunctionClass{ast.FTSMysqlMatchAgainst, 2, -1}}, // TiDB internal function. ast.TiDBDecodeKey: &tidbDecodeKeyFunctionClass{baseFunctionClass{ast.TiDBDecodeKey, 1, 1}}, diff --git a/pkg/expression/builtin_fts.go b/pkg/expression/builtin_fts.go index 15cef850f05b2..727cc99901b66 100644 --- a/pkg/expression/builtin_fts.go +++ b/pkg/expression/builtin_fts.go @@ -16,6 +16,7 @@ package expression import ( "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tipb/go-tipb" @@ -23,10 +24,12 @@ import ( var ( _ functionClass = &ftsMatchWordFunctionClass{} + _ functionClass = &ftsMysqlMatchAgainstFunctionClass{} ) var ( _ builtinFunc = &builtinFtsMatchWordSig{} + _ builtinFunc = &builtinFtsMysqlMatchAgainstSig{} ) type ftsMatchWordFunctionClass struct { @@ -37,12 +40,43 @@ type builtinFtsMatchWordSig struct { baseBuiltinFunc } +type ftsMysqlMatchAgainstFunctionClass struct { + baseFunctionClass +} + +type builtinFtsMysqlMatchAgainstSig struct { + baseBuiltinFunc + modifier ast.FulltextSearchModifier +} + func (b *builtinFtsMatchWordSig) Clone() builtinFunc { newSig := &builtinFtsMatchWordSig{} newSig.cloneFrom(&b.baseBuiltinFunc) return newSig } +func (b *builtinFtsMysqlMatchAgainstSig) Clone() builtinFunc { + newSig := &builtinFtsMysqlMatchAgainstSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + newSig.modifier = b.modifier + return newSig +} + +func (b *builtinFtsMysqlMatchAgainstSig) SetModifier(modifier ast.FulltextSearchModifier) { + b.modifier = modifier +} + +// SetFTSMysqlMatchAgainstModifier sets the modifier for the internal `MATCH ... AGAINST` builtin signature. +// It is expected to be called by planner right after building the scalar function. +func SetFTSMysqlMatchAgainstModifier(sf *ScalarFunction, modifier ast.FulltextSearchModifier) error { + sig, ok := sf.Function.(*builtinFtsMysqlMatchAgainstSig) + if !ok { + return errors.Errorf("unexpected builtin signature for %s: %T", ast.FTSMysqlMatchAgainst, sf.Function) + } + sig.SetModifier(modifier) + return nil +} + func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { if err := c.verifyArgs(args); err != nil { return nil, err @@ -53,8 +87,8 @@ func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Express if !ok { return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") } - if argAgainstConstant.Value.Kind() != types.KindString { - return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") + if argAgainstConstant.Value.Kind() != types.KindString && !argAgainstConstant.Value.IsNull() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-string constant") } argsMatch := args[1:] for _, arg := range argsMatch { @@ -65,7 +99,13 @@ func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Express } argTps := make([]types.EvalType, 0, len(args)) - argTps = append(argTps, types.ETString, types.ETString) + argTps = append(argTps, types.ETString) + for _, arg := range argsMatch { + if arg.GetType(ctx.GetEvalCtx()).EvalType() != types.ETString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + } + argTps = append(argTps, types.ETString) + } bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETReal, argTps...) if err != nil { @@ -78,6 +118,56 @@ func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Express } func (b *builtinFtsMatchWordSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { - // Reject executing match against in TiDB side. + if b.args[0].(*Constant).Value.IsNull() { + return 0, false, nil + } return 0, false, errors.Errorf("cannot use 'FTS_MATCH_WORD()' outside of fulltext index") } + +func (c *ftsMysqlMatchAgainstFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + + argAgainst := args[0] + argAgainstConstant, ok := argAgainst.(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") + } + if argAgainstConstant.Value.Kind() != types.KindString && !argAgainstConstant.Value.IsNull() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-string constant") + } + + argsMatch := args[1:] + for _, arg := range argsMatch { + _, ok := arg.(*Column) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("not matching a column") + } + } + + argTps := make([]types.EvalType, 0, len(args)) + argTps = append(argTps, types.ETString) + for _, arg := range argsMatch { + if arg.GetType(ctx.GetEvalCtx()).EvalType() != types.ETString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + } + argTps = append(argTps, types.ETString) + } + + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETReal, argTps...) + if err != nil { + return nil, err + } + + sig := &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: bf} + sig.setPbCode(tipb.ScalarFuncSig_FTSMatchExpression) + return sig, nil +} + +func (b *builtinFtsMysqlMatchAgainstSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { + if b.args[0].(*Constant).Value.IsNull() { + return 0, false, nil + } + return 0, false, errors.Errorf("cannot use 'MATCH ... AGAINST' outside of fulltext index") +} diff --git a/pkg/expression/builtin_threadsafe_generated.go b/pkg/expression/builtin_threadsafe_generated.go index 5247e12e7e795..cd6f54a8ade9b 100644 --- a/pkg/expression/builtin_threadsafe_generated.go +++ b/pkg/expression/builtin_threadsafe_generated.go @@ -994,6 +994,11 @@ func (s *builtinFtsMatchWordSig) SafeToShareAcrossSession() bool { return safeToShareAcrossSession(&s.safeToShareAcrossSessionFlag, s.args) } +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFtsMysqlMatchAgainstSig) SafeToShareAcrossSession() bool { + return safeToShareAcrossSession(&s.safeToShareAcrossSessionFlag, s.args) +} + // SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. func (s *builtinGEDecimalSig) SafeToShareAcrossSession() bool { return safeToShareAcrossSession(&s.safeToShareAcrossSessionFlag, s.args) diff --git a/pkg/expression/distsql_builtin.go b/pkg/expression/distsql_builtin.go index f942c037f1463..da379c31b33a6 100644 --- a/pkg/expression/distsql_builtin.go +++ b/pkg/expression/distsql_builtin.go @@ -1158,6 +1158,8 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie f = &builtinVecL2NormSig{base} case tipb.ScalarFuncSig_FTSMatchWord: f = &builtinFtsMatchWordSig{base} + case tipb.ScalarFuncSig_FTSMatchExpression: + f = &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: base} default: e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode) return nil, e diff --git a/pkg/expression/function_traits_test.go b/pkg/expression/function_traits_test.go index 8864e60478829..50768d6db29d8 100644 --- a/pkg/expression/function_traits_test.go +++ b/pkg/expression/function_traits_test.go @@ -186,6 +186,7 @@ func TestIllegalFunctions4GeneratedColumns(t *testing.T) { "make_set", "makedate", "maketime", + "match_against", "md5", "microsecond", "mid", diff --git a/pkg/expression/infer_pushdown.go b/pkg/expression/infer_pushdown.go index 6c6c35a208075..a1ca13fca32f4 100644 --- a/pkg/expression/infer_pushdown.go +++ b/pkg/expression/infer_pushdown.go @@ -450,7 +450,7 @@ func scalarExprSupportedByFlash(ctx EvalContext, function *ScalarFunction) bool return true case ast.VecDims, ast.VecL1Distance, ast.VecL2Distance, ast.VecNegativeInnerProduct, ast.VecCosineDistance, ast.VecL2Norm, ast.VecAsText: return true - case ast.FTSMatchWord: + case ast.FTSMatchWord, ast.FTSMysqlMatchAgainst: return true case ast.Grouping: // grouping function for grouping sets identification. return true diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 3bc73c912c2cd..025418ae45686 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -207,12 +207,10 @@ func TestFTSSyntax(t *testing.T) { // tk.MustContainErrMsg("select * from t where (fts_match_word('hello', title)) > 0", "Currently 'FTS_MATCH_WORD()' must be used alone") // tk.MustContainErrMsg("select (fts_match_word('hello', title)) AS score from t where fts_match_word('hello', title)", "Currently 'FTS_MATCH_WORD()' cannot be used in SELECT fields") tk.MustContainErrMsg("select * from t where match() against ('hello')", `You have an error in your SQL syntax`) - // Test MATCH...AGAINST with default 'like' fallback mode - should succeed + // Test MATCH...AGAINST with alternative plans - LIKE fallback competes on cost + tk.MustExec("set @@tidb_opt_enable_alternative_logical_plans=ON") tk.MustQuery("select * from t where match(title) against ('hello' in boolean mode)") - // Test MATCH...AGAINST with 'error' fallback mode - should fail - tk.MustExec("set @@tidb_opt_fulltext_search_fallback='error'") - tk.MustContainErrMsg("select * from t where match(title) against ('hello' in boolean mode)", `fulltext search not supported`) - tk.MustExec("set @@tidb_opt_fulltext_search_fallback='like'") + tk.MustExec("set @@tidb_opt_enable_alternative_logical_plans=OFF") tk.MustContainErrMsg("select * from t where fts_match_word(title, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word(45.67, body)", `match against a non-constant string`) tk.MustContainErrMsg("select * from t where fts_match_word('hello', title, body)", `Incorrect parameter count in the call to native function`) diff --git a/pkg/parser/ast/functions.go b/pkg/parser/ast/functions.go index 7e0e453860593..949efbc4e7241 100644 --- a/pkg/parser/ast/functions.go +++ b/pkg/parser/ast/functions.go @@ -372,7 +372,8 @@ const ( VecAsText = "vec_as_text" // FTS functions (tidb extension) - FTSMatchWord = "fts_match_word" + FTSMatchWord = "fts_match_word" + FTSMysqlMatchAgainst = "match_against" // TiDB internal function. TiDBDecodeKey = "tidb_decode_key" diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 103593226fa46..456a31d2718c6 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2305,41 +2305,67 @@ func (er *expressionRewriter) patternLikeOrIlikeToExpression(v *ast.PatternLikeO } func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { - // TODO: Check if a fulltext index exists for the given columns. - // This is currently not implemented because: - // 1. Column expressions at this point in rewriting don't have easy access to table metadata - // 2. Native fulltext search via MATCH...AGAINST is not yet supported in TiDB - // 3. Fulltext indexes in TiDB/TiFlash are used via fts_match_word() function, not MATCH...AGAINST - // When native MATCH...AGAINST support is added, we should error here if a fulltext index exists. - - // Check fallback mode - var fallbackMode string + // Both the column expressions and Against expression have been visited + // and pushed onto the ctxStack. The stack layout is: + // [..., col1, col2, ..., colN, against] + numCols := len(v.ColumnNames) + stackLen := len(er.ctxStack) + if stackLen < numCols+1 { + er.err = errors.Errorf("Unexpected stack length for MatchAgainst: %d", stackLen) + return + } + + // When alternative logical plans are enabled, convert MATCH...AGAINST to + // LIKE predicates as a fallback that always works without TiFlash. When + // disabled, convert to the native FTSMysqlMatchAgainst builtin which can + // be pushed down to TiFlash for execution against fulltext indexes. + useLikeFallback := false if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { - fallbackMode = er.planCtx.builder.ctx.GetSessionVars().FulltextSearchFallback + sessVars := er.planCtx.builder.ctx.GetSessionVars() + useLikeFallback = sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback + } + + if useLikeFallback { + er.matchAgainstToLike(v, numCols, stackLen) } else { - fallbackMode = "like" // default + er.matchAgainstToBuiltin(v, numCols, stackLen) } +} + +// matchAgainstToBuiltin converts MATCH...AGAINST to the FTSMysqlMatchAgainst +// builtin scalar function which can be pushed down to TiFlash for execution +// against a fulltext index. +func (er *expressionRewriter) matchAgainstToBuiltin(v *ast.MatchAgainst, numCols, stackLen int) { + against := er.ctxStack[stackLen-1] + cols := er.ctxStack[stackLen-numCols-1 : stackLen-1] + + args := make([]expression.Expression, 0, 1+numCols) + args = append(args, against) + args = append(args, cols...) - if fallbackMode == "error" { - er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST (fulltext search not supported, set tidb_opt_fulltext_search_fallback='like' to use LIKE fallback)") + er.ctxStackPop(numCols + 1) + fn, err := er.newFunction(ast.FTSMysqlMatchAgainst, &v.Type, args...) + if err != nil { + er.err = err return } - - // Fallback mode is "like" - convert to LIKE predicates - // Both the column expressions and Against expression have been visited - // and pushed onto the ctxStack. The stack layout is: - // [..., col1, col2, ..., colN, against] - numColumns := len(v.ColumnNames) - l := len(er.ctxStack) - if l < numColumns+1 { - er.err = errors.Errorf("MATCH...AGAINST: expected %d column expressions and Against expression on stack, got %d", numColumns+1, l) + sf, ok := fn.(*expression.ScalarFunction) + if !ok { + er.err = errors.Errorf("unexpected expression type for %s: %T", ast.FTSMysqlMatchAgainst, fn) + return + } + if err := expression.SetFTSMysqlMatchAgainstModifier(sf, v.Modifier); err != nil { + er.err = err return } + er.ctxStackAppend(fn, types.EmptyName) +} - // The Against expression is the last one on the stack - againstExpr := er.ctxStack[l-1] +// matchAgainstToLike converts MATCH...AGAINST to LIKE predicates as a +// fallback when the native FTS pushdown path is not viable. +func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, stackLen int) { + againstExpr := er.ctxStack[stackLen-1] - // Check if it's a constant string constExpr, ok := againstExpr.(*expression.Constant) if !ok { er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") @@ -2357,17 +2383,13 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { return } - // Get the column expressions from the stack - // They're at positions [l-numColumns-1 : l-1] - columns := make([]expression.Expression, numColumns) - for i := range numColumns { - columns[i] = er.ctxStack[l-numColumns-1+i] + columns := make([]expression.Expression, numCols) + for i := range numCols { + columns[i] = er.ctxStack[stackLen-numCols-1+i] } - // Pop all column expressions and the Against expression - er.ctxStackPop(numColumns + 1) + er.ctxStackPop(numCols + 1) - // Convert to LIKE predicates result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) if err != nil { er.err = err diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index a0ab91aa20ed0..dcd2291b8974d 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -622,6 +622,10 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW if needRestoreLogicalPlanCtx { initialLogicalPlanCtx = saveLogicalPlanBuildCtx(sessVars) sessVars.StmtCtx.ResetAlternativeLogicalPlanSignals() + // Enable LIKE fallback for MATCH...AGAINST in the first round so the + // first plan is always executable. The FTS native path is explored as + // an alternative round that may win on cost when TiFlash is available. + sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true } p, names, nonLogical, err := buildAndOptimizeLogicalPlanRound( diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 4d49a14b50928..b13e14d52fe99 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -482,6 +482,11 @@ type StatementContext struct { // logical build round produced an order-aware join reorder candidate that is // worth exploring in a dedicated alternative round. AlternativeLogicalPlanOrderAwareJoinReorder bool + // AlternativeLogicalPlanFTSLikeFallback is a mode flag set before the + // first build round when alternative logical plans are enabled. When true, + // the expression rewriter converts MATCH...AGAINST to LIKE predicates + // instead of the native FTSMysqlMatchAgainst builtin. + AlternativeLogicalPlanFTSLikeFallback bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -662,6 +667,7 @@ func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { sc.AlternativeLogicalPlanDecorrelatedApply = false sc.AlternativeLogicalPlanSameOrderIndexJoin = false sc.AlternativeLogicalPlanOrderAwareJoinReorder = false + sc.AlternativeLogicalPlanFTSLikeFallback = false } // MarkAlternativeLogicalPlanDecorrelatedApply records that at least one Apply has diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index d8b8c3fa9c9c7..fbd7e5ba29995 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -346,10 +346,6 @@ const ( // TiDBOptEnableCorrelationAdjustment is used to indicates if enable correlation adjustment. TiDBOptEnableCorrelationAdjustment = "tidb_opt_enable_correlation_adjustment" - // TiDBOptFulltextSearchFallback controls the behavior when MATCH...AGAINST syntax is used. - // Options: 'like' (convert to LIKE predicates, default), 'error' (throw error if no fulltext index). - TiDBOptFulltextSearchFallback = "tidb_opt_fulltext_search_fallback" - // TiDBOptLimitPushDownThreshold determines if push Limit or TopN down to TiKV forcibly. TiDBOptLimitPushDownThreshold = "tidb_opt_limit_push_down_threshold" @@ -1428,7 +1424,6 @@ const ( DefOptMPPOuterJoinFixedBuildSide = false DefOptWriteRowID = false DefOptEnableCorrelationAdjustment = true - DefOptFulltextSearchFallback = "like" DefOptLimitPushDownThreshold = 5000 DefOptCorrelationThreshold = 0.9 DefOptCorrelationExpFactor = 1 diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go index 9d6e9d44bdef8..90107e5d227e6 100644 --- a/pkg/sessionctx/variable/session.go +++ b/pkg/sessionctx/variable/session.go @@ -1068,9 +1068,6 @@ type SessionVars struct { // EnableCorrelationAdjustment is used to indicate if correlation adjustment is enabled. EnableCorrelationAdjustment bool - // FulltextSearchFallback controls the behavior when MATCH...AGAINST syntax is used. - // Options: 'like' (convert to LIKE predicates), 'error' (throw error if no fulltext index). - FulltextSearchFallback string // CorrelationExpFactor is used to control the heuristic approach of row count estimation when CorrelationThreshold is not met. CorrelationExpFactor int @@ -2353,7 +2350,6 @@ func NewSessionVars(hctx HookContext) *SessionVars { allowInSubqToJoinAndAgg: vardef.DefOptInSubqToJoinAndAgg, preferRangeScan: vardef.DefOptPreferRangeScan, EnableCorrelationAdjustment: vardef.DefOptEnableCorrelationAdjustment, - FulltextSearchFallback: vardef.DefOptFulltextSearchFallback, LimitPushDownThreshold: vardef.DefOptLimitPushDownThreshold, CorrelationThreshold: vardef.DefOptCorrelationThreshold, CorrelationExpFactor: vardef.DefOptCorrelationExpFactor, diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index a0f44aa6263aa..e8ffc994bb9a2 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -2243,10 +2243,6 @@ var defaultSysVars = []*SysVar{ s.EnableCorrelationAdjustment = TiDBOptOn(val) return nil }}, - {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptFulltextSearchFallback, Value: vardef.DefOptFulltextSearchFallback, Type: vardef.TypeEnum, PossibleValues: []string{"like", "error"}, SetSession: func(s *SessionVars, val string) error { - s.FulltextSearchFallback = val - return nil - }}, {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptCorrelationExpFactor, Value: strconv.Itoa(vardef.DefOptCorrelationExpFactor), Type: vardef.TypeUnsigned, MinValue: 0, MaxValue: math.MaxInt32, SetSession: func(s *SessionVars, val string) error { s.CorrelationExpFactor = int(TidbOptInt64(val, vardef.DefOptCorrelationExpFactor)) return nil diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index d527be5f99393..d6c0f343813e7 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -1,5 +1,5 @@ set tidb_cost_model_version=1; -set @@tidb_opt_fulltext_search_fallback='like'; +set @@tidb_opt_enable_alternative_logical_plans=ON; drop table if exists articles; create table articles (id int primary key, title varchar(200), body text); insert into articles values @@ -45,10 +45,10 @@ select id, title from articles where match(title) against('tutorial security' in id title select id, title from articles where match(title) against(''); id title -set @@tidb_opt_fulltext_search_fallback='error'; +set @@tidb_opt_enable_alternative_logical_plans=OFF; select id, title from articles where match(title) against('MySQL'); -Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST (fulltext search not supported, set tidb_opt_fulltext_search_fallback='like' to use LIKE fallback)' -set @@tidb_opt_fulltext_search_fallback='like'; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +set @@tidb_opt_enable_alternative_logical_plans=ON; select id, title from articles where match(title) against('MySQL'); id title 1 MySQL Tutorial @@ -129,4 +129,5 @@ id title select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); id title 2 How To Use MySQL Well +set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index 8ff4c7a559897..56beff825873b 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -1,8 +1,8 @@ -# Test cases for MATCH...AGAINST to LIKE conversion +# Test cases for MATCH...AGAINST to LIKE conversion via alternative logical plans # Setup set tidb_cost_model_version=1; -set @@tidb_opt_fulltext_search_fallback='like'; +set @@tidb_opt_enable_alternative_logical_plans=ON; drop table if exists articles; create table articles (id int primary key, title varchar(200), body text); insert into articles values @@ -39,13 +39,13 @@ select id, title from articles where match(title) against('tutorial security' in # Test 9: Empty Search String select id, title from articles where match(title) against(''); -# Test 10: Test with error mode -set @@tidb_opt_fulltext_search_fallback='error'; --- error 1235 +# Test 10: Test without alternative plans (native FTS path - errors without TiFlash) +set @@tidb_opt_enable_alternative_logical_plans=OFF; +-- error 1105 select id, title from articles where match(title) against('MySQL'); -# Test 11: Switch back to like mode -set @@tidb_opt_fulltext_search_fallback='like'; +# Test 11: Switch back to alternative plans mode +set @@tidb_opt_enable_alternative_logical_plans=ON; select id, title from articles where match(title) against('MySQL'); # Test 12: Natural Language Mode with single word @@ -110,11 +110,6 @@ select id, title from articles where match(title) against('-"MySQL Tutorial"' in # Test 24: Boolean mode - mix of required/excluded phrases and words select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); -# NOTE: An additional test would verify that MATCH...AGAINST returns an error when a fulltext -# index exists (since TiDB doesn't natively support fulltext search yet). However, we cannot -# test this in the integration test environment because creating fulltext indexes requires -# TiFlash, which is not available in the test setup. The code properly checks for fulltext -# indexes and returns an error when one is found (see expression_rewriter.go:matchAgainstToExpression). - # Cleanup +set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; From 8ba9a8a97646febe276d05948b2f39d23533fff8 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 25 Apr 2026 18:28:50 -0700 Subject: [PATCH 17/42] expression: revert fts_match_word arity/impl changes not needed for LIKE rewrite The ftsMatchWordFunctionClass changes (variable arity, new error messages, null-check in evalReal) were copied from origin/feature/fts unnecessarily. They broke the existing TestFTSSyntax assertion that fts_match_word with 3 args fails with "Incorrect parameter count". The LIKE rewrite feature only requires FTSMysqlMatchAgainst; FTSMatchWord is left as-is on master. Co-Authored-By: Claude Sonnet 4.6 --- pkg/expression/builtin.go | 2 +- pkg/expression/builtin_fts.go | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index f167205327398..7a2256733034a 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -980,7 +980,7 @@ var funcs = map[string]functionClass{ ast.VecAsText: &vecAsTextFunctionClass{baseFunctionClass{ast.VecAsText, 1, 1}}, // fts functions - ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, -1}}, + ast.FTSMatchWord: &ftsMatchWordFunctionClass{baseFunctionClass{ast.FTSMatchWord, 2, 2}}, ast.FTSMysqlMatchAgainst: &ftsMysqlMatchAgainstFunctionClass{baseFunctionClass{ast.FTSMysqlMatchAgainst, 2, -1}}, // TiDB internal function. diff --git a/pkg/expression/builtin_fts.go b/pkg/expression/builtin_fts.go index 727cc99901b66..49428c16b5384 100644 --- a/pkg/expression/builtin_fts.go +++ b/pkg/expression/builtin_fts.go @@ -87,8 +87,8 @@ func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Express if !ok { return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") } - if argAgainstConstant.Value.Kind() != types.KindString && !argAgainstConstant.Value.IsNull() { - return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-string constant") + if argAgainstConstant.Value.Kind() != types.KindString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("match against a non-constant string") } argsMatch := args[1:] for _, arg := range argsMatch { @@ -99,13 +99,7 @@ func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Express } argTps := make([]types.EvalType, 0, len(args)) - argTps = append(argTps, types.ETString) - for _, arg := range argsMatch { - if arg.GetType(ctx.GetEvalCtx()).EvalType() != types.ETString { - return nil, ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") - } - argTps = append(argTps, types.ETString) - } + argTps = append(argTps, types.ETString, types.ETString) bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETReal, argTps...) if err != nil { @@ -118,9 +112,7 @@ func (c *ftsMatchWordFunctionClass) getFunction(ctx BuildContext, args []Express } func (b *builtinFtsMatchWordSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { - if b.args[0].(*Constant).Value.IsNull() { - return 0, false, nil - } + // Reject executing match against in TiDB side. return 0, false, errors.Errorf("cannot use 'FTS_MATCH_WORD()' outside of fulltext index") } From 19e9dc36172f20b3fd8363af85581de187eeb5fe Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 25 Apr 2026 22:38:36 -0700 Subject: [PATCH 18/42] planner, expression: fix four review findings in MATCH...AGAINST LIKE fallback P0: stmtctx: restore AlternativeLogicalPlanPreferCorrelate field that was accidentally replaced instead of added alongside FTSLikeFallback, causing a compile error in ResetAlternativeLogicalPlanSignals and MarkAlternativeLogicalPlanPreferCorrelate. P1: fulltext_to_like: all-negative boolean searches (e.g. "-a -b" with no required or optional terms) now return a constant FALSE instead of NOT(LIKE a) AND NOT(LIKE b). MySQL boolean fulltext returns an empty result set for all-negative queries; the old rewrite returned most rows. P1: expression_rewriter: call SetSkipPlanCache when performing the LIKE fallback rewrite. The rewrite bakes the search string into plan constants at build time; without this, a cached plan would reuse the first execution's LIKE patterns for subsequent executions with different search strings. P2: document in matchAgainstToExpression that the LIKE fallback applies in all expression contexts including SELECT/ORDER BY, where MySQL normally returns a float relevance score but the fallback returns 0 or 1. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/expression_rewriter.go | 9 +++++++++ pkg/planner/core/fulltext_to_like.go | 11 +++++++++++ pkg/sessionctx/stmtctx/stmtctx.go | 4 ++++ .../r/planner/core/fulltext_search.result | 7 ------- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 01a1c28903a50..f272ac1629b1a 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2379,6 +2379,10 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { // LIKE predicates as a fallback that always works without TiFlash. When // disabled, convert to the native FTSMysqlMatchAgainst builtin which can // be pushed down to TiFlash for execution against fulltext indexes. + // + // Limitation: the LIKE fallback applies in all expression contexts, including + // SELECT/ORDER BY scoring uses. In those contexts MySQL returns a float + // relevance score, but the fallback returns 1 (matched) or 0 (not matched). useLikeFallback := false if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { sessVars := er.planCtx.builder.ctx.GetSessionVars() @@ -2432,6 +2436,11 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } + // The search string is baked into LIKE pattern constants at plan-build time. + // A cached plan would reuse the first execution's patterns for all subsequent + // executions, producing wrong results. Mark the plan as non-cacheable. + er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes search string into plan constants") + searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) if err != nil { er.err = err diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 1f7ec7821a4b9..626d6741d66c5 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -217,6 +217,17 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } } + // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns + // an empty result set. The LIKE fallback must match this: when there are + // no required and no optional terms, no row can possibly satisfy the + // search, so return a constant FALSE immediately. + if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { + return &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + // Build predicates with correct Boolean logic for multiple columns // In MySQL, MATCH(col1, col2) AGAINST('+word1 +word2') means: // - word1 must appear in (col1 OR col2) diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index a32f9cd579bcd..85165c4d8cecc 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -482,6 +482,10 @@ type StatementContext struct { // logical build round produced an order-aware join reorder candidate that is // worth exploring in a dedicated alternative round. AlternativeLogicalPlanOrderAwareJoinReorder bool + // AlternativeLogicalPlanPreferCorrelate indicates whether the current logical + // build round encountered a non-correlated IN subquery eligible for the + // correlate-to-Apply alternative. + AlternativeLogicalPlanPreferCorrelate bool // AlternativeLogicalPlanFTSLikeFallback is a mode flag set before the // first build round when alternative logical plans are enabled. When true, // the expression rewriter converts MATCH...AGAINST to LIKE predicates diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index d6c0f343813e7..3f44425af8150 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -82,9 +82,6 @@ id content drop table if exists special_chars; select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); id title -1 MySQL Tutorial -2 How To Use MySQL Well -3 Optimizing MySQL select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); id title select id, title from articles where match(title) against('+MySQL +tutorial @@ -122,10 +119,6 @@ id title 1 MySQL Tutorial select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); id title -2 How To Use MySQL Well -3 Optimizing MySQL -4 MySQL vs. PostgreSQL -5 MySQL Security select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); id title 2 How To Use MySQL Well From 569f3aa9d80d8f13db8d5988a4894d931c9df1ec Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 25 Apr 2026 22:48:19 -0700 Subject: [PATCH 19/42] rebase after months of change --- pkg/executor/windows/BUILD.bazel | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/executor/windows/BUILD.bazel b/pkg/executor/windows/BUILD.bazel index 03353e37731c3..8fd1fff825b95 100644 --- a/pkg/executor/windows/BUILD.bazel +++ b/pkg/executor/windows/BUILD.bazel @@ -34,7 +34,6 @@ go_test( flaky = True, shard_count = 7, deps = [ - ":windows", "//pkg/parser/mysql", "//pkg/testkit", ], From 19aba2cd10a822980ec52ba8a5a033de1bef8e77 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 26 Apr 2026 08:48:38 -0700 Subject: [PATCH 20/42] planner: handle optional+excluded boolean FTS terms in LIKE fallback When required==0 and both optional and excluded terms are present (e.g. "cat -dog"), the previous code ignored the optional terms entirely, returning all rows that didn't contain "dog". Now optional terms are treated as a positive filter: at least one must match, AND-ed with the NOT-exclusion predicates. Cases: required>0: optionals still ignored (required terms already anchor result) required==0, excluded==0: pure optional query, unchanged (DNF of optionals) required==0, excluded>0: optional-DNF folded into allPredicates so it is AND-ed with the excluded NOT-predicates via ComposeCNFCondition Add tests 25 and 26 covering the optional+excluded combination. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/fulltext_to_like.go | 21 +++++++++++++------ .../r/planner/core/fulltext_search.result | 5 +++++ .../t/planner/core/fulltext_search.test | 6 ++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 626d6741d66c5..34942373530eb 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -271,10 +271,13 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } } - // For optional terms: - // - If there are required/excluded terms, ignore optional terms (we can't rank in LIKE fallback) - // - If there are ONLY optional terms, require at least one to match - if len(optional) > 0 && len(required) == 0 && len(excluded) == 0 { + // For optional terms: since LIKE cannot rank, treat optionals as a + // positive filter when no required terms exist. + // - required>0: ignore optionals (required terms already anchor the result) + // - required==0, excluded==0: at least one optional must match (pure optional query) + // - required==0, excluded>0: at least one optional must match AND excluded terms + // must be absent; AND the optional-DNF into allPredicates below + if len(optional) > 0 && len(required) == 0 { var allOptionalPreds []expression.Expression for _, term := range optional { for _, column := range columns { @@ -286,8 +289,14 @@ func (er *expressionRewriter) convertMatchAgainstToLike( } } if len(allOptionalPreds) > 0 { - // When there are only optional terms, at least one must match - return expression.ComposeDNFCondition(er.sctx, allOptionalPreds...), nil + optionalDNF := expression.ComposeDNFCondition(er.sctx, allOptionalPreds...) + if len(excluded) == 0 { + // Pure optional query: return the DNF directly. + return optionalDNF, nil + } + // Optional + excluded: fold optional requirement into allPredicates + // so it is AND-ed with the NOT-exclusion predicates below. + allPredicates = append(allPredicates, optionalDNF) } } diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index 3f44425af8150..75a5fb6d3d6a1 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -122,5 +122,10 @@ id title select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); id title 2 How To Use MySQL Well +select id, title from articles where match(title) against('tutorial -Security' in boolean mode); +id title +select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); +id title +4 MySQL vs. PostgreSQL set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index 56beff825873b..d7b363b43bd13 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -110,6 +110,12 @@ select id, title from articles where match(title) against('-"MySQL Tutorial"' in # Test 24: Boolean mode - mix of required/excluded phrases and words select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); +# Test 25: Boolean mode - optional + excluded (optional treated as required filter) +select id, title from articles where match(title) against('tutorial -Security' in boolean mode); + +# Test 26: Boolean mode - optional + excluded with multiple optionals +select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); + # Cleanup set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; From eb1d4120d631980a844937717f097c58def9455d Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 26 Apr 2026 15:20:59 -0700 Subject: [PATCH 21/42] planner: fix four correctness issues in MATCH...AGAINST LIKE fallback P1: nullable column produces wrong exclusion results. LIKE on a NULL column returns NULL, so NOT(NULL) = NULL silently drops rows that have a NULL column and don't contain the excluded term. Wrap every LIKE call with IFNULL(..., 0) so NULL columns are treated as not-containing-the-term (consistent with MySQL FTS semantics). P1: tokenization retains punctuation. Natural-language mode split tokens like "MySQL," were searched literally as "%MySQL,%" missing all matches. Strip leading/trailing non-word punctuation from each natural-language token via stripTokenPunctuation. Also extend parseSearchTerm to strip MySQL relevance modifiers > and < and grouping parentheses () so ">MySQL" and "(word)" are treated as the bare optional terms "MySQL" and "word". P2: AGAINST(NULL) errored instead of returning no matches. Return a constant FALSE predicate when the evaluated search datum is NULL, consistent with the native FTS builtin which returns 0 for NULL input. Add TestParseSearchTerm cases for >, <, () and integration tests 27-29 covering punctuation stripping, relevance modifiers, and NULL AGAINST. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/expression_rewriter.go | 10 ++++ pkg/planner/core/fulltext_to_like.go | 49 ++++++++++++++++++- pkg/planner/core/fulltext_to_like_test.go | 18 +++++++ .../r/planner/core/fulltext_search.result | 16 ++++++ .../t/planner/core/fulltext_search.test | 9 ++++ 5 files changed, 101 insertions(+), 1 deletion(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index f272ac1629b1a..1ced500e428a4 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2447,6 +2447,16 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } + if searchText.IsNull() { + // NULL search string matches nothing, consistent with native FTS behavior. + er.ctxStackPop(numCols + 1) + er.ctxStackAppend(&expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, types.EmptyName) + return + } + if searchText.Kind() != types.KindString { er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search expression") return diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 34942373530eb..7dff29a46686e 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -141,6 +141,14 @@ func parseSearchTerm(word string) searchTerm { word = word[1:] } + // Strip MySQL relevance modifiers > and < (treat as optional in LIKE fallback) + if len(word) > 0 && (word[0] == '>' || word[0] == '<') { + word = word[1:] + } + + // Strip grouping parentheses that MySQL uses for sub-expression grouping + word = strings.Trim(word, "()") + // Check for trailing wildcard and strip it (we don't use it differently, see struct comment) if len(word) > 0 && word[len(word)-1] == '*' { word = word[:len(word)-1] @@ -150,6 +158,27 @@ func parseSearchTerm(word string) searchTerm { return term } +// stripTokenPunctuation removes leading and trailing non-word characters from a +// natural-language search token so that punctuation attached to a word by the +// tokenizer (e.g. "MySQL," → "MySQL") is not included in the LIKE pattern. +// Non-ASCII bytes (> 127) are treated as word characters so multi-byte UTF-8 +// characters pass through unchanged. +func stripTokenPunctuation(word string) string { + start := 0 + for start < len(word) && !isWordByte(word[start]) { + start++ + } + end := len(word) + for end > start && !isWordByte(word[end-1]) { + end-- + } + return word[start:end] +} + +func isWordByte(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 +} + // convertMatchAgainstToLike converts a MATCH...AGAINST expression to LIKE predicates // // This is a fallback implementation since TiDB does not natively support full-text search. @@ -323,6 +352,11 @@ func (er *expressionRewriter) convertMatchAgainstToLike( for _, column := range columns { var wordPredicates []expression.Expression for _, word := range words { + // Strip leading/trailing punctuation so "MySQL," becomes "MySQL" + word = stripTokenPunctuation(word) + if word == "" { + continue + } pred, err := er.buildLikePredicate(column, word) if err != nil { return nil, err @@ -406,5 +440,18 @@ func (er *expressionRewriter) buildLikePredicate( return nil, err } - return likeFunc, nil + // Wrap with IFNULL so that a NULL column is treated as not containing the term + // (consistent with MySQL FTS semantics where NULL columns are ignored). + // Without this, NOT(NULL LIKE %term%) = NOT(NULL) = NULL which incorrectly + // filters rows that have a NULL column and don't contain the excluded term. + zeroConst := &expression.Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } + nullSafeLike, err := er.newFunction(ast.Ifnull, types.NewFieldType(mysql.TypeTiny), likeFunc, zeroConst) + if err != nil { + return nil, err + } + + return nullSafeLike, nil } diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 6c8bcfd0aa823..e5ba7ff2e1d08 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -193,6 +193,24 @@ func TestParseSearchTerm(t *testing.T) { input: "+*", expected: searchTerm{word: "", isRequired: true}, }, + // MySQL relevance modifiers > and < are stripped; word is treated as optional + { + input: ">word", + expected: searchTerm{word: "word"}, + }, + { + input: "MySQL < treated as optional +select id, title from articles where match(title) against('>MySQL Date: Sun, 26 Apr 2026 15:42:39 -0700 Subject: [PATCH 22/42] planner/util: update null-reject builtin registry snapshot for match_against Adding ast.FTSMysqlMatchAgainst ("match_against") to the expression builtin funcs map changed the set of registered builtin function names. The snapshot hash guards against silent registry drift; update it to the new value. match_against is neither null-preserving nor unconditionally null-rejecting (it returns 0 for NULL input), so no changes to the null-reject lists. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/util/null_misc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/planner/util/null_misc_test.go b/pkg/planner/util/null_misc_test.go index 3461068806664..0e2c1eb41f741 100644 --- a/pkg/planner/util/null_misc_test.go +++ b/pkg/planner/util/null_misc_test.go @@ -38,7 +38,7 @@ func TestNullRejectBuiltinRegistrySnapshot(t *testing.T) { sum := sha256.Sum256([]byte(strings.Join(names, "\n"))) require.NotEmpty(t, names) - require.Equal(t, "a5ce0716b778fb8e0b488d3a11c402d8a8224191757a9e02ece80895d5d67e05", hex.EncodeToString(sum[:])) + require.Equal(t, "729f5252bcd91efe1a4bbf0c383a36c5a2e52ed2d90d7aab0a3e0b450322294c", hex.EncodeToString(sum[:])) for name := range nullRejectRejectNullTests { require.Contains(t, names, name) From a56628294c745c0db36052d4c656749018aaa1db Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 26 Apr 2026 17:30:59 -0700 Subject: [PATCH 23/42] expression: regenerate builtin thread-safety files for builtinFtsMysqlMatchAgainstSig Run go generate to fix CI failure. The generator correctly places builtinFtsMysqlMatchAgainstSig.SafeToShareAcrossSession in the threadunsafe file (returning false) because the struct has a modifier field without a safeToShareAcrossSessionFlag. Co-Authored-By: Claude Sonnet 4.6 --- pkg/expression/builtin_threadsafe_generated.go | 5 ----- pkg/expression/builtin_threadunsafe_generated.go | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/expression/builtin_threadsafe_generated.go b/pkg/expression/builtin_threadsafe_generated.go index cd6f54a8ade9b..5247e12e7e795 100644 --- a/pkg/expression/builtin_threadsafe_generated.go +++ b/pkg/expression/builtin_threadsafe_generated.go @@ -994,11 +994,6 @@ func (s *builtinFtsMatchWordSig) SafeToShareAcrossSession() bool { return safeToShareAcrossSession(&s.safeToShareAcrossSessionFlag, s.args) } -// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. -func (s *builtinFtsMysqlMatchAgainstSig) SafeToShareAcrossSession() bool { - return safeToShareAcrossSession(&s.safeToShareAcrossSessionFlag, s.args) -} - // SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. func (s *builtinGEDecimalSig) SafeToShareAcrossSession() bool { return safeToShareAcrossSession(&s.safeToShareAcrossSessionFlag, s.args) diff --git a/pkg/expression/builtin_threadunsafe_generated.go b/pkg/expression/builtin_threadunsafe_generated.go index 6f28645e4a349..2140fab768df1 100644 --- a/pkg/expression/builtin_threadunsafe_generated.go +++ b/pkg/expression/builtin_threadunsafe_generated.go @@ -81,6 +81,11 @@ func (s *builtinValidatePasswordStrengthSig) SafeToShareAcrossSession() bool { return false } +// SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. +func (s *builtinFtsMysqlMatchAgainstSig) SafeToShareAcrossSession() bool { + return false +} + // SafeToShareAcrossSession implements BuiltinFunc.SafeToShareAcrossSession. func (s *builtinIlikeSig) SafeToShareAcrossSession() bool { return false From afcf28583a481b7668875f097171915904528bd5 Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 26 Apr 2026 18:57:21 -0700 Subject: [PATCH 24/42] tests: add match_against to SHOW BUILTINS expected output The executor/show integration test records the output of SHOW BUILTINS. Adding the match_against builtin function causes it to appear in the alphabetically-sorted list between maketime and md5. Co-Authored-By: Claude Sonnet 4.6 --- tests/integrationtest/r/executor/show.result | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integrationtest/r/executor/show.result b/tests/integrationtest/r/executor/show.result index 69793e37958fd..625d9a6215550 100644 --- a/tests/integrationtest/r/executor/show.result +++ b/tests/integrationtest/r/executor/show.result @@ -754,6 +754,7 @@ ltrim make_set makedate maketime +match_against md5 microsecond mid From d5cfdcb53131e9d2ea69bc8080400d8bbe721efe Mon Sep 17 00:00:00 2001 From: tpp Date: Sun, 26 Apr 2026 20:48:52 -0700 Subject: [PATCH 25/42] planner, expression: fix review findings in MATCH...AGAINST LIKE fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix copyright year 2025 → 2026 in fulltext_to_like.go and _test.go - Correct misleading comment in optimize.go: FTSLikeFallback stays true for all alternative rounds (not just first); native FTS builtin path is only used when alternative logical plans are disabled - Harden evalReal: use comma-ok type assertion and return (0, true, nil) for NULL AGAINST to properly signal NULL result per MySQL semantics Co-Authored-By: Claude Sonnet 4.6 --- pkg/expression/builtin_fts.go | 6 ++++-- pkg/planner/core/fulltext_to_like.go | 2 +- pkg/planner/core/fulltext_to_like_test.go | 2 +- pkg/planner/optimize.go | 8 +++++--- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pkg/expression/builtin_fts.go b/pkg/expression/builtin_fts.go index 49428c16b5384..430b0dabb371e 100644 --- a/pkg/expression/builtin_fts.go +++ b/pkg/expression/builtin_fts.go @@ -158,8 +158,10 @@ func (c *ftsMysqlMatchAgainstFunctionClass) getFunction(ctx BuildContext, args [ } func (b *builtinFtsMysqlMatchAgainstSig) evalReal(ctx EvalContext, row chunk.Row) (float64, bool, error) { - if b.args[0].(*Constant).Value.IsNull() { - return 0, false, nil + // args[0] is validated to be a *Constant by getFunction; guard defensively + // since the sig may be reconstructed via the distsql path without that check. + if constArg, ok := b.args[0].(*Constant); ok && constArg.Value.IsNull() { + return 0, true, nil } return 0, false, errors.Errorf("cannot use 'MATCH ... AGAINST' outside of fulltext index") } diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 7dff29a46686e..7568b38814cb4 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -1,4 +1,4 @@ -// Copyright 2025 PingCAP, Inc. +// Copyright 2026 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index e5ba7ff2e1d08..a7c354217af65 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -1,4 +1,4 @@ -// Copyright 2025 PingCAP, Inc. +// Copyright 2026 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 67f9ce5b8230c..e9b9ffe248e95 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -676,9 +676,11 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW if needRestoreLogicalPlanCtx { initialLogicalPlanCtx = saveLogicalPlanBuildCtx(sessVars) sessVars.StmtCtx.ResetAlternativeLogicalPlanSignals() - // Enable LIKE fallback for MATCH...AGAINST in the first round so the - // first plan is always executable. The FTS native path is explored as - // an alternative round that may win on cost when TiFlash is available. + // Enable LIKE fallback for MATCH...AGAINST whenever alternative logical + // plans are active. The flag is set here (before the first round) and + // stays set for any subsequent alternative rounds, so all rounds produce + // an executable LIKE-based plan. The native FTS builtin path (TiFlash) + // is used only when alternative logical plans are disabled. sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true } From 4080f425d317cd57bb93be6fb1eb3a6844591cb4 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 27 Apr 2026 05:20:08 -0700 Subject: [PATCH 26/42] planner, expression: address review findings in MATCH...AGAINST LIKE fallback Fix four issues identified in code review: 1. Handle ~ operator in Boolean mode: strip it like > and < (treat term as optional) instead of searching for literal ~term in column. 2. Error on WITH QUERY EXPANSION: the modifier requires a second FTS pass to find semantically related terms which LIKE cannot emulate. Return ErrNotSupportedYet instead of silently ignoring the modifier and producing wrong results. 3. Validate column types in LIKE path: reject non-string columns with the same error as the native builtin, instead of silently accepting them and producing unexpected LIKE results. 4. Document protobuf modifier loss: add a comment in distsql_builtin.go noting that builtinFtsMysqlMatchAgainstSig.modifier is not preserved through tipb serialization because tipb has no FTS metadata message. Also improve the convertMatchAgainstToLike doc comment to be explicit that the fallback returns 0/1 in all expression contexts (including ORDER BY MATCH...DESC and scalar SELECT), not just WHERE predicates. Add integration tests for ~ handling and QUERY EXPANSION error. Co-Authored-By: Claude Sonnet 4.6 --- pkg/expression/distsql_builtin.go | 5 +++++ pkg/planner/core/expression_rewriter.go | 7 ++++++- pkg/planner/core/fulltext_to_like.go | 21 +++++++++++++++---- pkg/planner/core/fulltext_to_like_test.go | 6 +++++- .../r/planner/core/fulltext_search.result | 6 ++++++ .../t/planner/core/fulltext_search.test | 7 +++++++ 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/pkg/expression/distsql_builtin.go b/pkg/expression/distsql_builtin.go index da379c31b33a6..14f82e1b9609e 100644 --- a/pkg/expression/distsql_builtin.go +++ b/pkg/expression/distsql_builtin.go @@ -1159,6 +1159,11 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie case tipb.ScalarFuncSig_FTSMatchWord: f = &builtinFtsMatchWordSig{base} case tipb.ScalarFuncSig_FTSMatchExpression: + // NOTE: builtinFtsMysqlMatchAgainstSig.modifier is not serialized in the + // protobuf encoding because the tipb schema has no FTS metadata message. + // The reconstructed sig therefore uses the zero modifier value + // (FulltextSearchModifierNaturalLanguageMode). TiFlash must derive the + // search mode from other context when executing this expression. f = &builtinFtsMysqlMatchAgainstSig{baseBuiltinFunc: base} default: e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 1ced500e428a4..de2775aceb2ce 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2464,7 +2464,12 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s columns := make([]expression.Expression, numCols) for i := range numCols { - columns[i] = er.ctxStack[stackLen-numCols-1+i] + col := er.ctxStack[stackLen-numCols-1+i] + if col.GetType(er.sctx.GetEvalCtx()).EvalType() != types.ETString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + return + } + columns[i] = col } er.ctxStackPop(numCols + 1) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 7568b38814cb4..558d350477313 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -141,8 +141,10 @@ func parseSearchTerm(word string) searchTerm { word = word[1:] } - // Strip MySQL relevance modifiers > and < (treat as optional in LIKE fallback) - if len(word) > 0 && (word[0] == '>' || word[0] == '<') { + // Strip MySQL relevance modifiers >, <, ~ (treat term as optional in LIKE fallback). + // ~ in MySQL Boolean FTS decreases the relevance of a term without excluding it; + // >, < adjust the relevance contribution. All three map to "optional" here. + if len(word) > 0 && (word[0] == '>' || word[0] == '<' || word[0] == '~') { word = word[1:] } @@ -185,7 +187,10 @@ func isWordByte(c byte) bool { // It provides basic text matching capabilities but has the following semantic differences // from MySQL's full-text search: // -// 1. No relevance scoring - returns 1 for match, 0 for no match (MySQL returns a relevance score) +// 1. No relevance scoring — returns 1 (match) or 0 (no match) in all expression contexts. +// Queries using MATCH...AGAINST for relevance ranking (ORDER BY MATCH(...) DESC, or +// scalar SELECT MATCH(...)) will get 0/1 integer results instead of float relevance scores. +// This is a fundamental limitation of the LIKE-based approximation. // 2. No stop word filtering - searches for all words regardless of length or commonness // 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) // 4. No word boundaries - LIKE %term% matches substrings anywhere, not just complete words @@ -201,7 +206,8 @@ func isWordByte(c byte) bool { // 6. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) // // Supported Boolean mode operators: + (required), - (excluded), * (prefix wildcard), "..." (phrase) -// Unsupported operators: ~ (negation with ranking), > < (relevance modifiers), () (grouping) +// Partially supported: ~ (treated as optional, ranking effect ignored), > < (treated as optional) +// Unsupported: WITH QUERY EXPANSION (returns an error), () sub-expression grouping (stripped) func (er *expressionRewriter) convertMatchAgainstToLike( columns []expression.Expression, searchText string, @@ -211,6 +217,13 @@ func (er *expressionRewriter) convertMatchAgainstToLike( return nil, expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with no columns") } + // WITH QUERY EXPANSION requires a second FTS pass to find semantically related + // terms; LIKE cannot approximate this. Error explicitly rather than silently + // producing wrong results. + if modifier.WithQueryExpansion() { + return nil, expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback") + } + if searchText == "" { // Empty search string matches nothing return &expression.Constant{ diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index a7c354217af65..4a91cb7238467 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -193,7 +193,7 @@ func TestParseSearchTerm(t *testing.T) { input: "+*", expected: searchTerm{word: "", isRequired: true}, }, - // MySQL relevance modifiers > and < are stripped; word is treated as optional + // MySQL relevance modifiers >, <, ~ are stripped; word is treated as optional { input: ">word", expected: searchTerm{word: "word"}, @@ -202,6 +202,10 @@ func TestParseSearchTerm(t *testing.T) { input: "MySQL Date: Mon, 27 Apr 2026 05:38:06 -0700 Subject: [PATCH 27/42] planner: restrict MATCH...AGAINST LIKE rewrite to predicate contexts The LIKE fallback for MATCH...AGAINST must not apply in SELECT field list or ORDER BY clauses. In those contexts MySQL returns a float relevance score; the 0/1 LIKE result would silently corrupt queries like ORDER BY MATCH(...) DESC LIMIT n. Non-predicate contexts now always use the native FTSMysqlMatchAgainst builtin, which errors without TiFlash rather than returning semantically wrong results. The rewrite continues to apply in WHERE, HAVING, and JOIN ON clauses, where MATCH...AGAINST is used as a boolean filter and LIKE %term% correctly approximates the MySQL semantics. Tests 32 and 33 verify that SELECT MATCH and ORDER BY MATCH return error 1105 when alternative logical plans are enabled but TiFlash is not available. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/expression_rewriter.go | 25 ++++++++++++------- pkg/sessionctx/stmtctx/stmtctx.go | 2 +- .../r/planner/core/fulltext_search.result | 4 +++ .../t/planner/core/fulltext_search.test | 8 ++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index de2775aceb2ce..41eb5a62b4f6d 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2375,18 +2375,25 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { return } - // When alternative logical plans are enabled, convert MATCH...AGAINST to - // LIKE predicates as a fallback that always works without TiFlash. When - // disabled, convert to the native FTSMysqlMatchAgainst builtin which can - // be pushed down to TiFlash for execution against fulltext indexes. - // - // Limitation: the LIKE fallback applies in all expression contexts, including - // SELECT/ORDER BY scoring uses. In those contexts MySQL returns a float - // relevance score, but the fallback returns 1 (matched) or 0 (not matched). + // When alternative logical plans are enabled, AlternativeLogicalPlanFTSLikeFallback + // is set before the first build round and the expression rewriter converts + // MATCH...AGAINST to LIKE predicates — but ONLY in filter/predicate clauses + // (WHERE, HAVING, JOIN ON). In scoring contexts (SELECT field list, ORDER BY) + // the result must be a float relevance score; the 0/1 LIKE result would be + // semantically wrong and silently corrupt ORDER BY MATCH(...) DESC results. + // Those contexts always use the native FTSMysqlMatchAgainst builtin. useLikeFallback := false if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { sessVars := er.planCtx.builder.ctx.GetSessionVars() - useLikeFallback = sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback + if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { + // Only rewrite to LIKE in predicate (filter) clauses. + // SELECT field list and ORDER BY expect a float relevance score; + // the 0/1 LIKE result must not substitute it. + switch er.planCtx.builder.curClause { + case whereClause, havingClause, onClause: + useLikeFallback = true + } + } } if useLikeFallback { diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 85165c4d8cecc..e7db6986fc06d 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -489,7 +489,7 @@ type StatementContext struct { // AlternativeLogicalPlanFTSLikeFallback is a mode flag set before the // first build round when alternative logical plans are enabled. When true, // the expression rewriter converts MATCH...AGAINST to LIKE predicates - // instead of the native FTSMysqlMatchAgainst builtin. + // (predicate contexts only) instead of the native FTSMysqlMatchAgainst builtin. AlternativeLogicalPlanFTSLikeFallback bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index b50957d5a3e4a..a58bef4329809 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -149,5 +149,9 @@ id title 5 MySQL Security select id, title from articles where match(title) against('MySQL' with query expansion); Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback' +select id, match(title) against('MySQL') as score from articles; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles order by match(title) against('MySQL') desc; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index e91f5f6babdb7..0b568c3cf68e4 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -132,6 +132,14 @@ select id, title from articles where match(title) against('~Security ~PostgreSQL -- error 1235 select id, title from articles where match(title) against('MySQL' with query expansion); +# Test 32: MATCH in SELECT (scoring context) uses native FTS path, not LIKE; errors without TiFlash +-- error 1105 +select id, match(title) against('MySQL') as score from articles; + +# Test 33: MATCH in ORDER BY (scoring context) uses native FTS path, not LIKE; errors without TiFlash +-- error 1105 +select id, title from articles order by match(title) against('MySQL') desc; + # Cleanup set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; From b7733c7286b3c3658dabd478b8435c77974aff19 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 27 Apr 2026 05:53:37 -0700 Subject: [PATCH 28/42] planner: use ILIKE for case-insensitive MATCH...AGAINST LIKE fallback MySQL full-text search is always case-insensitive regardless of column collation. The LIKE fallback was using plain LIKE, which follows the column's collation (often utf8mb4_bin = case-sensitive). This caused queries like '+MySQL +tutorial' to return empty results when the data has 'MySQL Tutorial' (uppercase T). Switch from LIKE to ILIKE (case-insensitive LIKE) so the fallback correctly matches MySQL FTS semantics. For example, '+MySQL -tutorial' now correctly excludes 'MySQL Tutorial'. Also add integration tests for: - MATCH in HAVING clause (predicate context, LIKE fallback applies) - MATCH in JOIN ON clause (predicate context, LIKE fallback applies) - MATCH on non-string column (errors with 1235) Add comment on isWordByte noting _ is not a word character, consistent with MySQL's FTS tokenizer. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/fulltext_to_like.go | 13 +++++++++---- .../r/planner/core/fulltext_search.result | 16 +++++++++++++++- .../t/planner/core/fulltext_search.test | 10 ++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 558d350477313..36335fc905c70 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -177,6 +177,9 @@ func stripTokenPunctuation(word string) string { return word[start:end] } +// isWordByte returns true for alphanumeric ASCII and non-ASCII bytes. +// Punctuation including underscore is NOT a word character, consistent with +// MySQL's built-in FTS tokenizer which treats _ as a word separator. func isWordByte(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 } @@ -202,8 +205,7 @@ func isWordByte(c byte) bool { // (MySQL FTS only matches the exact phrase with word boundaries) // This limitation exists because LIKE cannot enforce word boundaries without REGEXP // -// 5. Case sensitivity - follows column collation (MySQL full-text search is case-insensitive) -// 6. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) +// 5. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) // // Supported Boolean mode operators: + (required), - (excluded), * (prefix wildcard), "..." (phrase) // Partially supported: ~ (treated as optional, ranking effect ignored), > < (treated as optional) @@ -447,8 +449,11 @@ func (er *expressionRewriter) buildLikePredicate( RetType: types.NewFieldType(mysql.TypeTiny), } - // Build LIKE function - likeFunc, err := er.newFunction(ast.Like, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) + // Build ILIKE function (case-insensitive LIKE). + // MySQL full-text search is always case-insensitive regardless of column + // collation, so ILIKE matches that semantic rather than plain LIKE which + // would follow the column's collation (often utf8mb4_bin = case-sensitive). + likeFunc, err := er.newFunction(ast.Ilike, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) if err != nil { return nil, err } diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index a58bef4329809..37652f7cf8c77 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -24,9 +24,9 @@ id title 5 MySQL Security select id, title from articles where match(title) against('+MySQL +tutorial' in boolean mode); id title +1 MySQL Tutorial select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); id title -1 MySQL Tutorial 2 How To Use MySQL Well 3 Optimizing MySQL 4 MySQL vs. PostgreSQL @@ -43,6 +43,8 @@ id title 5 MySQL Security select id, title from articles where match(title) against('tutorial security' in boolean mode); id title +1 MySQL Tutorial +5 MySQL Security select id, title from articles where match(title) against(''); id title set @@tidb_opt_enable_alternative_logical_plans=OFF; @@ -84,9 +86,11 @@ select id, title from articles where match(title) against('-PostgreSQL -Security id title select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); id title +1 MySQL Tutorial select id, title from articles where match(title) against('+MySQL +tutorial -Security' in boolean mode); id title +1 MySQL Tutorial select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); id title 1 MySQL Tutorial @@ -124,8 +128,10 @@ id title 2 How To Use MySQL Well select id, title from articles where match(title) against('tutorial -Security' in boolean mode); id title +1 MySQL Tutorial select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); id title +1 MySQL Tutorial 4 MySQL vs. PostgreSQL select id, title from articles where match(title) against('MySQL, PostgreSQL.'); id title @@ -153,5 +159,13 @@ select id, match(title) against('MySQL') as score from articles; Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index select id, title from articles order by match(title) against('MySQL') desc; Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles group by id, title having match(title) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +select a.id, a.title from articles a inner join articles a2 on a.id = a2.id and match(a.title) against('Security'); +id title +5 MySQL Security +select id from articles where match(id) against('MySQL'); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index 0b568c3cf68e4..28fb1eb0d196e 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -140,6 +140,16 @@ select id, match(title) against('MySQL') as score from articles; -- error 1105 select id, title from articles order by match(title) against('MySQL') desc; +# Test 34: MATCH in HAVING clause (predicate context - LIKE fallback applies) +select id, title from articles group by id, title having match(title) against('PostgreSQL'); + +# Test 35: MATCH in JOIN ON clause (predicate context - LIKE fallback applies) +select a.id, a.title from articles a inner join articles a2 on a.id = a2.id and match(a.title) against('Security'); + +# Test 36: MATCH on non-string column errors +-- error 1235 +select id from articles where match(id) against('MySQL'); + # Cleanup set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; From e6030436c2093b30da167ca295887b8df8eaeb07 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 27 Apr 2026 05:59:02 -0700 Subject: [PATCH 29/42] planner: fix gofmt comment formatting in fulltext_to_like.go Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/fulltext_to_like.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 36335fc905c70..12a4360b4dbd7 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -190,18 +190,18 @@ func isWordByte(c byte) bool { // It provides basic text matching capabilities but has the following semantic differences // from MySQL's full-text search: // -// 1. No relevance scoring — returns 1 (match) or 0 (no match) in all expression contexts. -// Queries using MATCH...AGAINST for relevance ranking (ORDER BY MATCH(...) DESC, or -// scalar SELECT MATCH(...)) will get 0/1 integer results instead of float relevance scores. -// This is a fundamental limitation of the LIKE-based approximation. -// 2. No stop word filtering - searches for all words regardless of length or commonness -// 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) -// 4. No word boundaries - LIKE %term% matches substrings anywhere, not just complete words -// - Simple terms: "cat" matches "concatenate", "category", "application" +// 1. No relevance scoring — returns 1 (match) or 0 (no match) in all expression contexts. +// Queries using MATCH...AGAINST for relevance ranking (ORDER BY MATCH(...) DESC, or +// scalar SELECT MATCH(...)) will get 0/1 integer results instead of float relevance scores. +// This is a fundamental limitation of the LIKE-based approximation. +// 2. No stop word filtering - searches for all words regardless of length or commonness +// 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) +// 4. No word boundaries - LIKE %term% matches substrings anywhere, not just complete words +// - Simple terms: "cat" matches "concatenate", "category", "application" // (MySQL FTS only matches "cat" as a standalone word) -// - Prefix wildcard: "Optim*" matches "reOptimizing", "Optimizing" +// - Prefix wildcard: "Optim*" matches "reOptimizing", "Optimizing" // (MySQL FTS only matches words starting with "Optim" like "Optimizing", not "reOptimizing") -// - Phrase matching: "quick brown" matches "aquick brownie" +// - Phrase matching: "quick brown" matches "aquick brownie" // (MySQL FTS only matches the exact phrase with word boundaries) // This limitation exists because LIKE cannot enforce word boundaries without REGEXP // From 1ddbcca8d58ad18a9758c11643ac5be87cd000a0 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 27 Apr 2026 10:25:20 -0700 Subject: [PATCH 30/42] planner: add fts-native alternative round for TiFlash FTS cost competition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When tidb_opt_enable_alternative_logical_plans is ON: - First round: ILIKE predicates (always executable, the safe fallback) - Alternative "fts-native" round: native FTSMysqlMatchAgainst builtin (pushed to TiFlash, may have much lower cost) The fts-native round is only enabled when the matched columns' table has TiFlash replicas (checked via InfoSchema during the first round). Without TiFlash, the round is skipped entirely and the ILIKE plan wins by default — avoiding the problem where the native FTS function has lower estimated cost (single scalar function vs IFNULL+ILIKE) but would error at execution time. Cost comparison uses strict <, so: - With TiFlash: native FTS pushed to TiFlash → much lower cost → wins - Without TiFlash: fts-native round not triggered → ILIKE plan used New StmtCtx signal: AlternativeLogicalPlanHasFTSWithTiFlash, set when MATCH...AGAINST is encountered and the table has TiFlash replicas. Co-Authored-By: Claude Sonnet 4.6 --- pkg/planner/core/expression_rewriter.go | 45 ++++++++++++++++++++++--- pkg/planner/optimize.go | 17 ++++++++++ pkg/sessionctx/stmtctx/stmtctx.go | 7 ++++ 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 41eb5a62b4f6d..59f4c320b8b47 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2377,22 +2377,34 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { // When alternative logical plans are enabled, AlternativeLogicalPlanFTSLikeFallback // is set before the first build round and the expression rewriter converts - // MATCH...AGAINST to LIKE predicates — but ONLY in filter/predicate clauses + // MATCH...AGAINST to ILIKE predicates — but ONLY in filter/predicate clauses // (WHERE, HAVING, JOIN ON). In scoring contexts (SELECT field list, ORDER BY) - // the result must be a float relevance score; the 0/1 LIKE result would be + // the result must be a float relevance score; the 0/1 ILIKE result would be // semantically wrong and silently corrupt ORDER BY MATCH(...) DESC results. // Those contexts always use the native FTSMysqlMatchAgainst builtin. + // + // When this is the first (ILIKE) round and the matched columns' table has + // TiFlash replicas, the HasFTSWithTiFlash signal is set so the "fts-native" + // alternative round is triggered. That round rebuilds the plan with the native + // builtin everywhere so TiFlash FTS can compete on cost. useLikeFallback := false if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { sessVars := er.planCtx.builder.ctx.GetSessionVars() if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { - // Only rewrite to LIKE in predicate (filter) clauses. + // Only rewrite to ILIKE in predicate (filter) clauses. // SELECT field list and ORDER BY expect a float relevance score; - // the 0/1 LIKE result must not substitute it. + // the 0/1 ILIKE result must not substitute it. switch er.planCtx.builder.curClause { case whereClause, havingClause, onClause: useLikeFallback = true } + + // Check if any matched column's table has TiFlash replicas. If so, + // signal the "fts-native" alternative round to try the native FTS + // builtin pushed to TiFlash, which may win on cost. + if !sessVars.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash { + er.checkFTSTiFlashAvailability(v, sessVars) + } } } @@ -2403,6 +2415,31 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { } } +// checkFTSTiFlashAvailability checks whether any of the matched columns' tables +// have TiFlash replicas. If so, it sets AlternativeLogicalPlanHasFTSWithTiFlash +// to trigger the "fts-native" alternative round. +func (er *expressionRewriter) checkFTSTiFlashAvailability(v *ast.MatchAgainst, sessVars *variable.SessionVars) { + builder := er.planCtx.builder + for _, col := range v.ColumnNames { + dbName := col.Schema + if dbName.L == "" { + dbName = ast.NewCIStr(sessVars.CurrentDB) + } + tblName := col.Table + if tblName.L == "" { + continue + } + tblInfo, err := builder.is.TableInfoByName(dbName, tblName) + if err != nil { + continue + } + if tblInfo.TiFlashReplica != nil && tblInfo.TiFlashReplica.Available && tblInfo.TiFlashReplica.Count > 0 { + sessVars.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash = true + return + } + } +} + // matchAgainstToBuiltin converts MATCH...AGAINST to the FTSMysqlMatchAgainst // builtin scalar function which can be pushed down to TiFlash for execution // against a fulltext index. diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index e9b9ffe248e95..c54a3608f2699 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -631,6 +631,23 @@ var alternativeRounds = [...]alternativeRound{ sv.EnableCorrelateSubquery = savedEnableCorrelateSubquery }, }, + { + // fts-native: rebuild the plan using the native FTSMysqlMatchAgainst + // builtin so TiFlash FTS can compete on cost against the first round's + // ILIKE-based plan. Only enabled when the first round detected that the + // matched columns' table has TiFlash replicas — without TiFlash the native + // builtin can't be pushed down and would error at execution time. + name: "fts-native", + enabled: func(sv *variable.SessionVars) bool { + return sv.EnableAlternativeLogicalPlans && sv.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash + }, + setup: func(sv *variable.SessionVars) { + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = false + }, + cleanup: func(sv *variable.SessionVars) { + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + }, + }, } func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW, is infoschema.InfoSchema) (base.Plan, types.NameSlice, float64, error) { diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index e7db6986fc06d..22eec39f4c939 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -491,6 +491,12 @@ type StatementContext struct { // the expression rewriter converts MATCH...AGAINST to LIKE predicates // (predicate contexts only) instead of the native FTSMysqlMatchAgainst builtin. AlternativeLogicalPlanFTSLikeFallback bool + // AlternativeLogicalPlanHasFTSWithTiFlash is set during the first (ILIKE) + // build round when a MATCH...AGAINST expression is encountered AND the + // matched columns' table has TiFlash replicas. This triggers the "fts-native" + // alternative round so the native FTS builtin (pushed to TiFlash) can compete + // on cost against the ILIKE plan. + AlternativeLogicalPlanHasFTSWithTiFlash bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -671,6 +677,7 @@ func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { sc.AlternativeLogicalPlanSameOrderIndexJoin = false sc.AlternativeLogicalPlanOrderAwareJoinReorder = false sc.AlternativeLogicalPlanFTSLikeFallback = false + sc.AlternativeLogicalPlanHasFTSWithTiFlash = false sc.AlternativeLogicalPlanPreferCorrelate = false } From dc2cccbe791ab26d11395f05e0efb78b32eceb70 Mon Sep 17 00:00:00 2001 From: tpp Date: Sat, 2 May 2026 21:16:05 -0700 Subject: [PATCH 31/42] review updates --- pkg/expression/BUILD.bazel | 2 + pkg/expression/fts_to_like.go | 464 ++++++++++++++++++ pkg/expression/fts_to_like_test.go | 271 ++++++++++ pkg/planner/cardinality/selectivity.go | 19 + pkg/planner/core/expression_rewriter.go | 118 ++++- pkg/planner/core/fulltext_to_like.go | 449 +---------------- pkg/planner/core/fulltext_to_like_test.go | 281 +++-------- pkg/planner/optimize.go | 20 +- .../r/planner/core/fulltext_search.result | 47 ++ .../t/planner/core/fulltext_search.test | 32 ++ 10 files changed, 1040 insertions(+), 663 deletions(-) create mode 100644 pkg/expression/fts_to_like.go create mode 100644 pkg/expression/fts_to_like_test.go diff --git a/pkg/expression/BUILD.bazel b/pkg/expression/BUILD.bazel index 3bd1edf787ac1..eb0bb0e32871d 100644 --- a/pkg/expression/BUILD.bazel +++ b/pkg/expression/BUILD.bazel @@ -65,6 +65,7 @@ go_library( "expression.go", "extension.go", "fts_helper.go", + "fts_to_like.go", "function_traits.go", "grouping_sets.go", "helper.go", @@ -199,6 +200,7 @@ go_test( "evaluator_test.go", "expr_to_pb_test.go", "expression_test.go", + "fts_to_like_test.go", "function_traits_test.go", "grouping_sets_test.go", "helper_test.go", diff --git a/pkg/expression/fts_to_like.go b/pkg/expression/fts_to_like.go new file mode 100644 index 0000000000000..e4cfcf4144acf --- /dev/null +++ b/pkg/expression/fts_to_like.go @@ -0,0 +1,464 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" +) + +// ftsSearchTerm represents a single term in a Boolean fulltext search query. +type ftsSearchTerm struct { + word string + isRequired bool // Has '+' prefix + isExcluded bool // Has '-' prefix + // Note: Phrases (wrapped in quotes) and prefix wildcards ('*' suffix) are parsed but not + // treated differently from regular terms because LIKE %term% already matches the term anywhere. + // Proper phrase/prefix matching would require REGEXP to enforce word boundaries, which we + // avoid for simplicity. +} + +// parseFTSBooleanSearchString parses a Boolean mode search string into individual terms. +func parseFTSBooleanSearchString(text string) []ftsSearchTerm { + var terms []ftsSearchTerm + var current strings.Builder + inQuote := false + phraseIsRequired := false + phraseIsExcluded := false + i := 0 + + for i < len(text) { + ch := text[i] + + switch ch { + case '"': + if inQuote { + // End of phrase + // NOTE: Phrase matching in MySQL full-text search finds the exact phrase as a sequence + // of words (word boundaries are enforced). Using LIKE %phrase%, we cannot perfectly + // enforce word boundaries without REGEXP. For example, "quick brown" would match + // "aquick brownie" which MySQL full-text search would not match. This is an acceptable + // limitation for a fallback implementation. + phrase := current.String() + if phrase != "" { + terms = append(terms, ftsSearchTerm{ + word: phrase, + isRequired: phraseIsRequired, + isExcluded: phraseIsExcluded, + }) + } + current.Reset() + inQuote = false + phraseIsRequired = false + phraseIsExcluded = false + } else { + // Check for leading operator before the quote (e.g., +"phrase" or -"phrase") + if current.Len() > 0 { + prefix := current.String() + // Only extract operator if prefix is exactly "+" or "-" + // Otherwise, treat it as a regular word + if prefix == "+" { + phraseIsRequired = true + } else if prefix == "-" { + phraseIsExcluded = true + } else { + // Not an operator, parse as a regular word first + terms = append(terms, parseFTSSearchTerm(prefix)) + } + current.Reset() + } + // Start of phrase + inQuote = true + } + i++ + case ' ', '\t', '\n', '\r': + if inQuote { + current.WriteByte(ch) + } else if current.Len() > 0 { + // End of word + word := current.String() + terms = append(terms, parseFTSSearchTerm(word)) + current.Reset() + } + i++ + default: + current.WriteByte(ch) + i++ + } + } + + // Handle remaining content + if current.Len() > 0 { + if inQuote { + // Unclosed quote, treat as phrase and preserve operator flags + phrase := current.String() + if phrase != "" { + terms = append(terms, ftsSearchTerm{ + word: phrase, + isRequired: phraseIsRequired, + isExcluded: phraseIsExcluded, + }) + } + } else { + word := current.String() + terms = append(terms, parseFTSSearchTerm(word)) + } + } + + return terms +} + +// parseFTSSearchTerm parses a single search term (not in quotes) and extracts operators. +func parseFTSSearchTerm(word string) ftsSearchTerm { + if word == "" { + return ftsSearchTerm{} + } + + term := ftsSearchTerm{word: word} + + // Check for leading operators + if word[0] == '+' { + term.isRequired = true + word = word[1:] + } else if word[0] == '-' { + term.isExcluded = true + word = word[1:] + } + + // Strip MySQL relevance modifiers >, <, ~ (treat term as optional in LIKE fallback). + // ~ in MySQL Boolean FTS decreases the relevance of a term without excluding it; + // >, < adjust the relevance contribution. All three map to "optional" here. + if len(word) > 0 && (word[0] == '>' || word[0] == '<' || word[0] == '~') { + word = word[1:] + } + + // Strip grouping parentheses that MySQL uses for sub-expression grouping + word = strings.Trim(word, "()") + + // Check for trailing wildcard and strip it (we don't use it differently, see struct comment) + if len(word) > 0 && word[len(word)-1] == '*' { + word = word[:len(word)-1] + } + + term.word = word + return term +} + +// stripFTSTokenPunctuation removes leading and trailing non-word characters from a +// natural-language search token so that punctuation attached to a word by the +// tokenizer (e.g. "MySQL," → "MySQL") is not included in the LIKE pattern. +// Non-ASCII bytes (> 127) are treated as word characters so multi-byte UTF-8 +// characters pass through unchanged. +func stripFTSTokenPunctuation(word string) string { + start := 0 + for start < len(word) && !isFTSWordByte(word[start]) { + start++ + } + end := len(word) + for end > start && !isFTSWordByte(word[end-1]) { + end-- + } + return word[start:end] +} + +// isFTSWordByte returns true for alphanumeric ASCII and non-ASCII bytes. +// Punctuation including underscore is NOT a word character, consistent with +// MySQL's built-in FTS tokenizer which treats _ as a word separator. +func isFTSWordByte(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 +} + +// escapeFTSLikePattern escapes special LIKE characters (%, _, \) in the search term +// so they are treated as literal characters rather than wildcards. +func escapeFTSLikePattern(term string) string { + // Count special characters to pre-allocate the exact buffer size needed + escapeCount := 0 + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + escapeCount++ + } + } + + // Allocate exact size: original length + number of escape characters + var result strings.Builder + result.Grow(len(term) + escapeCount) + for i := range len(term) { + ch := term[i] + if ch == '\\' || ch == '%' || ch == '_' { + result.WriteByte('\\') + } + result.WriteByte(ch) + } + return result.String() +} + +// BuildFTSToILikeExpression converts a MATCH...AGAINST input (a list of column +// expressions, the search-string literal, and the parsed modifier) into an +// equivalent ILIKE-based predicate expression. +// +// Two callers share this conversion: +// - the planner's MATCH...AGAINST LIKE fallback rewrite, used as the +// executable plan when the "fts-native" alternative round is not viable; +// - selectivity estimation, which substitutes the same ILIKE form for the +// opaque FTSMysqlMatchAgainst builtin so the two alternative rounds +// compete on cost using the same column-stats-derived row estimate +// (the native builtin cannot be evaluated in TiDB and would otherwise +// fall through to a flat default selectivity that ignores the column's +// histogram). +// +// Returns an integer (0/1) typed expression suitable for direct use as a +// filter predicate. +// +// Semantic differences from MySQL's full-text search are documented in detail +// at the planner-level call site; this helper preserves those approximations +// so both callers see the same translated expression. +func BuildFTSToILikeExpression( + ctx BuildContext, + columns []Expression, + searchText string, + modifier ast.FulltextSearchModifier, +) (Expression, error) { + if len(columns) == 0 { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with no columns") + } + + // WITH QUERY EXPANSION requires a second FTS pass to find semantically related + // terms; LIKE cannot approximate this. Error explicitly rather than silently + // producing wrong results. + if modifier.WithQueryExpansion() { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback") + } + + zeroIntConst := func() Expression { + return &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } + } + + if searchText == "" { + return zeroIntConst(), nil + } + + if modifier.IsBooleanMode() { + terms := parseFTSBooleanSearchString(searchText) + if len(terms) == 0 { + return zeroIntConst(), nil + } + + var required, excluded, optional []ftsSearchTerm + for _, term := range terms { + if term.word == "" { + continue + } + if term.isRequired { + required = append(required, term) + } else if term.isExcluded { + excluded = append(excluded, term) + } else { + optional = append(optional, term) + } + } + + // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns + // an empty result set. The LIKE fallback must match this: when there are + // no required and no optional terms, no row can possibly satisfy the + // search, so return a constant FALSE immediately. + if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { + return zeroIntConst(), nil + } + + var allPredicates []Expression + + // For each required term: (col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range required { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + allPredicates = append(allPredicates, ComposeDNFCondition(ctx, termColumnPreds...)) + } + } + + // For each excluded term: NOT(col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range excluded { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + notPred, err := NewFunction(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), + ComposeDNFCondition(ctx, termColumnPreds...)) + if err != nil { + return nil, err + } + allPredicates = append(allPredicates, notPred) + } + } + + // For optional terms: since LIKE cannot rank, treat optionals as a + // positive filter when no required terms exist. + // - required>0: ignore optionals (required terms already anchor the result) + // - required==0, excluded==0: at least one optional must match (pure optional query) + // - required==0, excluded>0: at least one optional must match AND excluded terms + // must be absent; AND the optional-DNF into allPredicates below + if len(optional) > 0 && len(required) == 0 { + var allOptionalPreds []Expression + for _, term := range optional { + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err + } + allOptionalPreds = append(allOptionalPreds, pred) + } + } + if len(allOptionalPreds) > 0 { + optionalDNF := ComposeDNFCondition(ctx, allOptionalPreds...) + if len(excluded) == 0 { + return optionalDNF, nil + } + allPredicates = append(allPredicates, optionalDNF) + } + } + + if len(allPredicates) == 0 { + return zeroIntConst(), nil + } + + return ComposeCNFCondition(ctx, allPredicates...), nil + } + + // Natural Language Mode: split into words and OR them together. + words := strings.Fields(searchText) + if len(words) == 0 { + return zeroIntConst(), nil + } + + var columnPredicates []Expression + for _, column := range columns { + var wordPredicates []Expression + for _, word := range words { + word = stripFTSTokenPunctuation(word) + if word == "" { + continue + } + pred, err := buildFTSILikePredicate(ctx, column, word) + if err != nil { + return nil, err + } + wordPredicates = append(wordPredicates, pred) + } + if len(wordPredicates) > 0 { + columnPredicates = append(columnPredicates, ComposeDNFCondition(ctx, wordPredicates...)) + } + } + + if len(columnPredicates) == 0 { + return zeroIntConst(), nil + } + + return ComposeDNFCondition(ctx, columnPredicates...), nil +} + +// BuildFTSToILikeExpressionFromBuiltin pulls the search string and modifier +// out of a MATCH...AGAINST scalar function (FTSMysqlMatchAgainst) and +// delegates to BuildFTSToILikeExpression. It is the entry point for +// selectivity estimation, where the FTS scalar function is opaque to the +// stats engine; substituting an equivalent ILIKE expression lets the engine +// reuse its TopN/histogram-based estimation paths instead of falling back +// to a flat default that ignores column statistics. +func BuildFTSToILikeExpressionFromBuiltin(ctx BuildContext, fts *ScalarFunction) (Expression, error) { + if fts == nil || fts.FuncName.L != ast.FTSMysqlMatchAgainst { + return nil, errors.Errorf("expected %s, got %v", ast.FTSMysqlMatchAgainst, fts) + } + args := fts.GetArgs() + if len(args) < 2 { + return nil, errors.Errorf("%s expects at least 2 args, got %d", ast.FTSMysqlMatchAgainst, len(args)) + } + againstConst, ok := args[0].(*Constant) + if !ok { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") + } + if againstConst.Value.IsNull() { + return &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + }, nil + } + if againstConst.Value.Kind() != types.KindString { + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-string search constant") + } + sig, ok := fts.Function.(*builtinFtsMysqlMatchAgainstSig) + if !ok { + return nil, errors.Errorf("unexpected builtin signature for %s: %T", ast.FTSMysqlMatchAgainst, fts.Function) + } + return BuildFTSToILikeExpression(ctx, args[1:], againstConst.Value.GetString(), sig.modifier) +} + +// buildFTSILikePredicate builds a single ILIKE predicate for a column and search term, +// wrapped in IFNULL so that NULL columns are treated as not containing the term. +func buildFTSILikePredicate(ctx BuildContext, column Expression, term string) (Expression, error) { + escapedTerm := escapeFTSLikePattern(term) + + // NOTE: Prefix matching (word*) in MySQL full-text search matches words that START with + // the prefix, but the word can appear anywhere in the text. Using LIKE without REGEXP, + // we cannot perfectly enforce word-start boundaries. We use %term% which may produce + // false positives but avoids false negatives. + pattern := "%" + escapedTerm + "%" + + patternConst := &Constant{ + Value: types.NewStringDatum(pattern), + RetType: types.NewFieldType(mysql.TypeVarchar), + } + + // Backslash escape character (=92) for ILIKE. + escapeConst := &Constant{ + Value: types.NewIntDatum(92), + RetType: types.NewFieldType(mysql.TypeTiny), + } + + // MySQL full-text search is always case-insensitive regardless of column + // collation, so ILIKE matches that semantic rather than plain LIKE which + // would follow the column's collation. + likeFunc, err := NewFunction(ctx, ast.Ilike, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) + if err != nil { + return nil, err + } + + // Wrap with IFNULL so a NULL column is treated as not containing the term + // (consistent with MySQL FTS semantics where NULL columns are ignored). + // Without this, NOT(NULL ILIKE %term%) = NOT(NULL) = NULL which incorrectly + // filters rows that have a NULL column and don't contain the excluded term. + zeroConst := &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } + return NewFunction(ctx, ast.Ifnull, types.NewFieldType(mysql.TypeTiny), likeFunc, zeroConst) +} diff --git a/pkg/expression/fts_to_like_test.go b/pkg/expression/fts_to_like_test.go new file mode 100644 index 0000000000000..22ed2289d482c --- /dev/null +++ b/pkg/expression/fts_to_like_test.go @@ -0,0 +1,271 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseFTSBooleanSearchString(t *testing.T) { + tests := []struct { + input string + expected []ftsSearchTerm + }{ + { + input: "+apple +pie", + expected: []ftsSearchTerm{ + {word: "apple", isRequired: true}, + {word: "pie", isRequired: true}, + }, + }, + { + input: "+apple -cherry", + expected: []ftsSearchTerm{ + {word: "apple", isRequired: true}, + {word: "cherry", isExcluded: true}, + }, + }, + { + input: "apple*", + expected: []ftsSearchTerm{ + {word: "apple"}, + }, + }, + { + input: `"exact phrase"`, + expected: []ftsSearchTerm{ + {word: "exact phrase"}, + }, + }, + { + input: `+database +mysql -oracle "full text"`, + expected: []ftsSearchTerm{ + {word: "database", isRequired: true}, + {word: "mysql", isRequired: true}, + {word: "oracle", isExcluded: true}, + {word: "full text"}, + }, + }, + { + input: "word1 word2 word3", + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "word2"}, + {word: "word3"}, + }, + }, + { + input: "+word1* -word2", + expected: []ftsSearchTerm{ + {word: "word1", isRequired: true}, + {word: "word2", isExcluded: true}, + }, + }, + { + input: `"unclosed quote`, + expected: []ftsSearchTerm{ + {word: "unclosed quote"}, + }, + }, + { + input: "word1\t\nword2", + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "word2"}, + }, + }, + { + input: `+"required phrase"`, + expected: []ftsSearchTerm{ + {word: "required phrase", isRequired: true}, + }, + }, + { + input: `-"excluded phrase"`, + expected: []ftsSearchTerm{ + {word: "excluded phrase", isExcluded: true}, + }, + }, + { + input: `+"required phrase" optional -"excluded phrase"`, + expected: []ftsSearchTerm{ + {word: "required phrase", isRequired: true}, + {word: "optional"}, + {word: "excluded phrase", isExcluded: true}, + }, + }, + { + input: `+word1 +"required phrase" -word2 -"excluded phrase"`, + expected: []ftsSearchTerm{ + {word: "word1", isRequired: true}, + {word: "required phrase", isRequired: true}, + {word: "word2", isExcluded: true}, + {word: "excluded phrase", isExcluded: true}, + }, + }, + { + input: `abc"phrase"`, + expected: []ftsSearchTerm{ + {word: "abc"}, + {word: "phrase"}, + }, + }, + { + input: `word1 abc"phrase" word2`, + expected: []ftsSearchTerm{ + {word: "word1"}, + {word: "abc"}, + {word: "phrase"}, + {word: "word2"}, + }, + }, + { + input: `+"unclosed`, + expected: []ftsSearchTerm{ + {word: "unclosed", isRequired: true}, + }, + }, + { + input: `-"unclosed phrase`, + expected: []ftsSearchTerm{ + {word: "unclosed phrase", isExcluded: true}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := parseFTSBooleanSearchString(tt.input) + require.Equal(t, len(tt.expected), len(result), "Number of terms should match") + for i, expected := range tt.expected { + require.Equal(t, expected.word, result[i].word, "Word should match") + require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") + require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") + } + }) + } +} + +func TestParseFTSSearchTerm(t *testing.T) { + tests := []struct { + input string + expected ftsSearchTerm + }{ + { + input: "+word", + expected: ftsSearchTerm{word: "word", isRequired: true}, + }, + { + input: "-word", + expected: ftsSearchTerm{word: "word", isExcluded: true}, + }, + { + input: "word*", + expected: ftsSearchTerm{word: "word"}, + }, + { + input: "+word*", + expected: ftsSearchTerm{word: "word", isRequired: true}, + }, + { + input: "word", + expected: ftsSearchTerm{word: "word"}, + }, + { + input: "", + expected: ftsSearchTerm{word: ""}, + }, + { + input: "+*", + expected: ftsSearchTerm{word: "", isRequired: true}, + }, + // MySQL relevance modifiers >, <, ~ are stripped; word is treated as optional + { + input: ">word", + expected: ftsSearchTerm{word: "word"}, + }, + { + input: " 0 { - sessVars.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash = true + colName := name.OrigColName + if colName.L == "" { + colName = name.ColName + } + if !tableHasPublicFTSIndexOnColumn(tblInfo, colName.L) { return } } + sessVars.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash = true +} + +// ftsModifierAllowsNativePushdown reports whether an FTS modifier can be +// safely served by the native FTSMysqlMatchAgainst builtin pushed to TiFlash. +// Today the tipb pushdown encodes only ScalarFuncSig_FTSMatchExpression and +// drops the modifier, so any non-default modifier would be executed by TiFlash +// as natural-language mode, silently producing wrong results. Only the default +// (natural-language, no query expansion) modifier is currently safe. +func ftsModifierAllowsNativePushdown(modifier ast.FulltextSearchModifier) bool { + return !modifier.IsBooleanMode() && !modifier.WithQueryExpansion() +} + +// tableHasPublicFTSIndexOnColumn reports whether tblInfo has a public FULLTEXT +// index covering the given column. TiDB's FULLTEXT index is single-column, so +// each column in MATCH(...) needs its own FTS index for the native path to be +// viable. +func tableHasPublicFTSIndexOnColumn(tblInfo *model.TableInfo, columnNameL string) bool { + for _, idx := range tblInfo.Indices { + if idx.FullTextInfo == nil || !idx.IsPublic() { + continue + } + if idx.FindColumnByName(columnNameL) != nil { + return true + } + } + return false } // matchAgainstToBuiltin converts MATCH...AGAINST to the FTSMysqlMatchAgainst @@ -2482,8 +2554,14 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s // The search string is baked into LIKE pattern constants at plan-build time. // A cached plan would reuse the first execution's patterns for all subsequent - // executions, producing wrong results. Mark the plan as non-cacheable. - er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes search string into plan constants") + // executions, producing wrong results when the AGAINST argument is mutable + // across executions (a `?` parameter marker or a deferred expression such as + // a user variable). For a true literal the baked pattern is stable, so the + // plan is safe to cache; only mark it non-cacheable when the constant could + // vary at execution time. + if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { + er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") + } searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) if err != nil { diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 12a4360b4dbd7..190624817cbe7 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -15,185 +15,30 @@ package core import ( - "strings" - "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/parser/ast" - "github.com/pingcap/tidb/pkg/parser/mysql" - "github.com/pingcap/tidb/pkg/types" ) -// searchTerm represents a single term in a Boolean fulltext search query -type searchTerm struct { - word string - isRequired bool // Has '+' prefix - isExcluded bool // Has '-' prefix - // Note: Phrases (wrapped in quotes) and prefix wildcards ('*' suffix) are parsed but not - // treated differently from regular terms because LIKE %term% already matches the term anywhere. - // Proper phrase/prefix matching would require REGEXP to enforce word boundaries, which we - // avoid for simplicity. -} - -// parseBooleanSearchString parses a Boolean mode search string into individual terms -func parseBooleanSearchString(text string) []searchTerm { - var terms []searchTerm - var current strings.Builder - inQuote := false - phraseIsRequired := false - phraseIsExcluded := false - i := 0 - - for i < len(text) { - ch := text[i] - - switch ch { - case '"': - if inQuote { - // End of phrase - // NOTE: Phrase matching in MySQL full-text search finds the exact phrase as a sequence - // of words (word boundaries are enforced). Using LIKE %phrase%, we cannot perfectly - // enforce word boundaries without REGEXP. For example, "quick brown" would match - // "aquick brownie" which MySQL full-text search would not match. This is an acceptable - // limitation for a fallback implementation. - phrase := current.String() - if phrase != "" { - terms = append(terms, searchTerm{ - word: phrase, - isRequired: phraseIsRequired, - isExcluded: phraseIsExcluded, - }) - } - current.Reset() - inQuote = false - phraseIsRequired = false - phraseIsExcluded = false - } else { - // Check for leading operator before the quote (e.g., +"phrase" or -"phrase") - if current.Len() > 0 { - prefix := current.String() - // Only extract operator if prefix is exactly "+" or "-" - // Otherwise, treat it as a regular word - if prefix == "+" { - phraseIsRequired = true - } else if prefix == "-" { - phraseIsExcluded = true - } else { - // Not an operator, parse as a regular word first - terms = append(terms, parseSearchTerm(prefix)) - } - current.Reset() - } - // Start of phrase - inQuote = true - } - i++ - case ' ', '\t', '\n', '\r': - if inQuote { - current.WriteByte(ch) - } else if current.Len() > 0 { - // End of word - word := current.String() - terms = append(terms, parseSearchTerm(word)) - current.Reset() - } - i++ - default: - current.WriteByte(ch) - i++ - } - } - - // Handle remaining content - if current.Len() > 0 { - if inQuote { - // Unclosed quote, treat as phrase and preserve operator flags - phrase := current.String() - if phrase != "" { - terms = append(terms, searchTerm{ - word: phrase, - isRequired: phraseIsRequired, - isExcluded: phraseIsExcluded, - }) - } - } else { - word := current.String() - terms = append(terms, parseSearchTerm(word)) - } - } - - return terms -} - -// parseSearchTerm parses a single search term (not in quotes) and extracts operators -func parseSearchTerm(word string) searchTerm { - if word == "" { - return searchTerm{} - } - - term := searchTerm{word: word} - - // Check for leading operators - if word[0] == '+' { - term.isRequired = true - word = word[1:] - } else if word[0] == '-' { - term.isExcluded = true - word = word[1:] - } - - // Strip MySQL relevance modifiers >, <, ~ (treat term as optional in LIKE fallback). - // ~ in MySQL Boolean FTS decreases the relevance of a term without excluding it; - // >, < adjust the relevance contribution. All three map to "optional" here. - if len(word) > 0 && (word[0] == '>' || word[0] == '<' || word[0] == '~') { - word = word[1:] - } - - // Strip grouping parentheses that MySQL uses for sub-expression grouping - word = strings.Trim(word, "()") - - // Check for trailing wildcard and strip it (we don't use it differently, see struct comment) - if len(word) > 0 && word[len(word)-1] == '*' { - word = word[:len(word)-1] - } - - term.word = word - return term -} - -// stripTokenPunctuation removes leading and trailing non-word characters from a -// natural-language search token so that punctuation attached to a word by the -// tokenizer (e.g. "MySQL," → "MySQL") is not included in the LIKE pattern. -// Non-ASCII bytes (> 127) are treated as word characters so multi-byte UTF-8 -// characters pass through unchanged. -func stripTokenPunctuation(word string) string { - start := 0 - for start < len(word) && !isWordByte(word[start]) { - start++ - } - end := len(word) - for end > start && !isWordByte(word[end-1]) { - end-- - } - return word[start:end] -} - -// isWordByte returns true for alphanumeric ASCII and non-ASCII bytes. -// Punctuation including underscore is NOT a word character, consistent with -// MySQL's built-in FTS tokenizer which treats _ as a word separator. -func isWordByte(c byte) bool { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 -} - -// convertMatchAgainstToLike converts a MATCH...AGAINST expression to LIKE predicates +// convertMatchAgainstToLike converts a MATCH...AGAINST expression to ILIKE +// predicates. It is a thin wrapper around expression.BuildFTSToILikeExpression; +// the conversion logic lives in pkg/expression so the same translation can be +// shared with cardinality-based selectivity estimation (which substitutes the +// equivalent ILIKE form for the opaque FTSMysqlMatchAgainst builtin). // -// This is a fallback implementation since TiDB does not natively support full-text search. -// It provides basic text matching capabilities but has the following semantic differences -// from MySQL's full-text search: +// This is a fallback rewrite since TiDB does not natively support full-text +// search outside the TiFlash FTS path. The planner only invokes it in +// predicate clauses (WHERE / HAVING / JOIN ON) — scoring contexts +// (SELECT field list, ORDER BY) keep the native FTSMysqlMatchAgainst builtin +// so the result is a float relevance score rather than 0/1, even though the +// native path then requires TiFlash at execution time. The semantic +// differences below therefore apply to predicate use only: // -// 1. No relevance scoring — returns 1 (match) or 0 (no match) in all expression contexts. -// Queries using MATCH...AGAINST for relevance ranking (ORDER BY MATCH(...) DESC, or -// scalar SELECT MATCH(...)) will get 0/1 integer results instead of float relevance scores. -// This is a fundamental limitation of the LIKE-based approximation. +// 1. No relevance scoring — the synthesized ILIKE predicate produces a 0/1 +// boolean filter result, which is the only thing a WHERE/HAVING/JOIN ON +// clause consumes. Relevance ranking (ORDER BY MATCH(...) DESC) and +// scalar SELECT MATCH(...) are intentionally NOT routed through this +// fallback for that reason; substituting 0/1 there would silently +// corrupt the sort or the projected score. // 2. No stop word filtering - searches for all words regardless of length or commonness // 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) // 4. No word boundaries - LIKE %term% matches substrings anywhere, not just complete words @@ -215,261 +60,5 @@ func (er *expressionRewriter) convertMatchAgainstToLike( searchText string, modifier ast.FulltextSearchModifier, ) (expression.Expression, error) { - if len(columns) == 0 { - return nil, expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with no columns") - } - - // WITH QUERY EXPANSION requires a second FTS pass to find semantically related - // terms; LIKE cannot approximate this. Error explicitly rather than silently - // producing wrong results. - if modifier.WithQueryExpansion() { - return nil, expression.ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback") - } - - if searchText == "" { - // Empty search string matches nothing - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } - - var columnPredicates []expression.Expression - - if modifier.IsBooleanMode() { - // Parse Boolean mode search string - terms := parseBooleanSearchString(searchText) - if len(terms) == 0 { - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } - - // Group terms by type - var required, excluded, optional []searchTerm - for _, term := range terms { - if term.word == "" { - continue - } - if term.isRequired { - required = append(required, term) - } else if term.isExcluded { - excluded = append(excluded, term) - } else { - optional = append(optional, term) - } - } - - // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns - // an empty result set. The LIKE fallback must match this: when there are - // no required and no optional terms, no row can possibly satisfy the - // search, so return a constant FALSE immediately. - if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } - - // Build predicates with correct Boolean logic for multiple columns - // In MySQL, MATCH(col1, col2) AGAINST('+word1 +word2') means: - // - word1 must appear in (col1 OR col2) - // - word2 must appear in (col1 OR col2) - var allPredicates []expression.Expression - - // For each required term: (col1 LIKE %term% OR col2 LIKE %term%) - for _, term := range required { - var termColumnPreds []expression.Expression - for _, column := range columns { - pred, err := er.buildLikePredicate(column, term.word) - if err != nil { - return nil, err - } - termColumnPreds = append(termColumnPreds, pred) - } - // At least one column must match this required term - if len(termColumnPreds) > 0 { - allPredicates = append(allPredicates, expression.ComposeDNFCondition(er.sctx, termColumnPreds...)) - } - } - - // For each excluded term: NOT(col1 LIKE %term% OR col2 LIKE %term%) - for _, term := range excluded { - var termColumnPreds []expression.Expression - for _, column := range columns { - pred, err := er.buildLikePredicate(column, term.word) - if err != nil { - return nil, err - } - termColumnPreds = append(termColumnPreds, pred) - } - // None of the columns should match this excluded term - if len(termColumnPreds) > 0 { - notPred, err := er.newFunction(ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), - expression.ComposeDNFCondition(er.sctx, termColumnPreds...)) - if err != nil { - return nil, err - } - allPredicates = append(allPredicates, notPred) - } - } - - // For optional terms: since LIKE cannot rank, treat optionals as a - // positive filter when no required terms exist. - // - required>0: ignore optionals (required terms already anchor the result) - // - required==0, excluded==0: at least one optional must match (pure optional query) - // - required==0, excluded>0: at least one optional must match AND excluded terms - // must be absent; AND the optional-DNF into allPredicates below - if len(optional) > 0 && len(required) == 0 { - var allOptionalPreds []expression.Expression - for _, term := range optional { - for _, column := range columns { - pred, err := er.buildLikePredicate(column, term.word) - if err != nil { - return nil, err - } - allOptionalPreds = append(allOptionalPreds, pred) - } - } - if len(allOptionalPreds) > 0 { - optionalDNF := expression.ComposeDNFCondition(er.sctx, allOptionalPreds...) - if len(excluded) == 0 { - // Pure optional query: return the DNF directly. - return optionalDNF, nil - } - // Optional + excluded: fold optional requirement into allPredicates - // so it is AND-ed with the NOT-exclusion predicates below. - allPredicates = append(allPredicates, optionalDNF) - } - } - - // AND all required/excluded predicates together - if len(allPredicates) == 0 { - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } - - return expression.ComposeCNFCondition(er.sctx, allPredicates...), nil - } - - // Natural Language Mode: split into words and OR them together - words := strings.Fields(searchText) - if len(words) == 0 { - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } - - for _, column := range columns { - var wordPredicates []expression.Expression - for _, word := range words { - // Strip leading/trailing punctuation so "MySQL," becomes "MySQL" - word = stripTokenPunctuation(word) - if word == "" { - continue - } - pred, err := er.buildLikePredicate(column, word) - if err != nil { - return nil, err - } - wordPredicates = append(wordPredicates, pred) - } - if len(wordPredicates) > 0 { - columnPredicates = append(columnPredicates, expression.ComposeDNFCondition(er.sctx, wordPredicates...)) - } - } - - // OR across all columns - if len(columnPredicates) == 0 { - return &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - }, nil - } - - return expression.ComposeDNFCondition(er.sctx, columnPredicates...), nil -} - -// escapeLikePattern escapes special LIKE characters (%, _, \) in the search term -// so they are treated as literal characters rather than wildcards -func escapeLikePattern(term string) string { - // Count special characters to pre-allocate the exact buffer size needed - escapeCount := 0 - for i := range len(term) { - ch := term[i] - if ch == '\\' || ch == '%' || ch == '_' { - escapeCount++ - } - } - - // Allocate exact size: original length + number of escape characters - var result strings.Builder - result.Grow(len(term) + escapeCount) - for i := range len(term) { - ch := term[i] - if ch == '\\' || ch == '%' || ch == '_' { - result.WriteByte('\\') - } - result.WriteByte(ch) - } - return result.String() -} - -// buildLikePredicate builds a single LIKE predicate for a column and search term -func (er *expressionRewriter) buildLikePredicate( - column expression.Expression, - term string, -) (expression.Expression, error) { - // Escape special LIKE characters in the search term - escapedTerm := escapeLikePattern(term) - - // Build the pattern - // NOTE: Prefix matching (word*) in MySQL full-text search matches words that START with - // the prefix, but the word can appear anywhere in the text. For example, "Optim*" should - // match "Optimizing MySQL" but NOT "reOptimizing". Using LIKE without REGEXP, we cannot - // perfectly enforce word-start boundaries. We use %term% which may produce false positives - // (matching mid-word like "reOptimizing"), but avoids false negatives. This is an acceptable - // limitation for a fallback implementation. - // Both prefix and general matches use %term% to find the term anywhere in text - pattern := "%" + escapedTerm + "%" - - // Create constant for pattern - patternConst := &expression.Constant{ - Value: types.NewStringDatum(pattern), - RetType: types.NewFieldType(mysql.TypeVarchar), - } - - // Create escape constant (backslash = 92) - escapeConst := &expression.Constant{ - Value: types.NewIntDatum(92), - RetType: types.NewFieldType(mysql.TypeTiny), - } - - // Build ILIKE function (case-insensitive LIKE). - // MySQL full-text search is always case-insensitive regardless of column - // collation, so ILIKE matches that semantic rather than plain LIKE which - // would follow the column's collation (often utf8mb4_bin = case-sensitive). - likeFunc, err := er.newFunction(ast.Ilike, types.NewFieldType(mysql.TypeTiny), column, patternConst, escapeConst) - if err != nil { - return nil, err - } - - // Wrap with IFNULL so that a NULL column is treated as not containing the term - // (consistent with MySQL FTS semantics where NULL columns are ignored). - // Without this, NOT(NULL LIKE %term%) = NOT(NULL) = NULL which incorrectly - // filters rows that have a NULL column and don't contain the excluded term. - zeroConst := &expression.Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - } - nullSafeLike, err := er.newFunction(ast.Ifnull, types.NewFieldType(mysql.TypeTiny), likeFunc, zeroConst) - if err != nil { - return nil, err - } - - return nullSafeLike, nil + return expression.BuildFTSToILikeExpression(er.sctx, columns, searchText, modifier) } diff --git a/pkg/planner/core/fulltext_to_like_test.go b/pkg/planner/core/fulltext_to_like_test.go index 4a91cb7238467..159eccf7e9cda 100644 --- a/pkg/planner/core/fulltext_to_like_test.go +++ b/pkg/planner/core/fulltext_to_like_test.go @@ -17,255 +17,118 @@ package core import ( "testing" + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/parser/ast" "github.com/stretchr/testify/require" ) -func TestParseBooleanSearchString(t *testing.T) { +func TestFTSModifierAllowsNativePushdown(t *testing.T) { tests := []struct { - input string - expected []searchTerm + name string + modifier ast.FulltextSearchModifier + expected bool }{ { - input: "+apple +pie", - expected: []searchTerm{ - {word: "apple", isRequired: true}, - {word: "pie", isRequired: true}, - }, - }, - { - input: "+apple -cherry", - expected: []searchTerm{ - {word: "apple", isRequired: true}, - {word: "cherry", isExcluded: true}, - }, - }, - { - input: "apple*", - expected: []searchTerm{ - {word: "apple"}, - }, - }, - { - input: `"exact phrase"`, - expected: []searchTerm{ - {word: "exact phrase"}, - }, - }, - { - input: `+database +mysql -oracle "full text"`, - expected: []searchTerm{ - {word: "database", isRequired: true}, - {word: "mysql", isRequired: true}, - {word: "oracle", isExcluded: true}, - {word: "full text"}, - }, - }, - { - input: "word1 word2 word3", - expected: []searchTerm{ - {word: "word1"}, - {word: "word2"}, - {word: "word3"}, - }, - }, - { - input: "+word1* -word2", - expected: []searchTerm{ - {word: "word1", isRequired: true}, - {word: "word2", isExcluded: true}, - }, - }, - { - input: `"unclosed quote`, - expected: []searchTerm{ - {word: "unclosed quote"}, - }, - }, - { - input: "word1\t\nword2", - expected: []searchTerm{ - {word: "word1"}, - {word: "word2"}, - }, - }, - { - input: `+"required phrase"`, - expected: []searchTerm{ - {word: "required phrase", isRequired: true}, - }, - }, - { - input: `-"excluded phrase"`, - expected: []searchTerm{ - {word: "excluded phrase", isExcluded: true}, - }, - }, - { - input: `+"required phrase" optional -"excluded phrase"`, - expected: []searchTerm{ - {word: "required phrase", isRequired: true}, - {word: "optional"}, - {word: "excluded phrase", isExcluded: true}, - }, - }, - { - input: `+word1 +"required phrase" -word2 -"excluded phrase"`, - expected: []searchTerm{ - {word: "word1", isRequired: true}, - {word: "required phrase", isRequired: true}, - {word: "word2", isExcluded: true}, - {word: "excluded phrase", isExcluded: true}, - }, - }, - { - input: `abc"phrase"`, - expected: []searchTerm{ - {word: "abc"}, - {word: "phrase"}, - }, - }, - { - input: `word1 abc"phrase" word2`, - expected: []searchTerm{ - {word: "word1"}, - {word: "abc"}, - {word: "phrase"}, - {word: "word2"}, - }, + name: "natural language mode (default)", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode), + expected: true, }, { - input: `+"unclosed`, - expected: []searchTerm{ - {word: "unclosed", isRequired: true}, - }, + name: "boolean mode", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode), + expected: false, }, { - input: `-"unclosed phrase`, - expected: []searchTerm{ - {word: "unclosed phrase", isExcluded: true}, - }, + name: "natural language mode with query expansion", + modifier: ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion), + expected: false, }, } for _, tt := range tests { - t.Run(tt.input, func(t *testing.T) { - result := parseBooleanSearchString(tt.input) - require.Equal(t, len(tt.expected), len(result), "Number of terms should match") - for i, expected := range tt.expected { - require.Equal(t, expected.word, result[i].word, "Word should match") - require.Equal(t, expected.isRequired, result[i].isRequired, "isRequired should match") - require.Equal(t, expected.isExcluded, result[i].isExcluded, "isExcluded should match") - } + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.expected, ftsModifierAllowsNativePushdown(tt.modifier)) }) } } -func TestParseSearchTerm(t *testing.T) { - tests := []struct { - input string - expected searchTerm - }{ - { - input: "+word", - expected: searchTerm{word: "word", isRequired: true}, - }, - { - input: "-word", - expected: searchTerm{word: "word", isExcluded: true}, - }, - { - input: "word*", - expected: searchTerm{word: "word"}, - }, - { - input: "+word*", - expected: searchTerm{word: "word", isRequired: true}, - }, - { - input: "word", - expected: searchTerm{word: "word"}, - }, - { - input: "", - expected: searchTerm{word: ""}, - }, - { - input: "+*", - expected: searchTerm{word: "", isRequired: true}, - }, - // MySQL relevance modifiers >, <, ~ are stripped; word is treated as optional - { - input: ">word", - expected: searchTerm{word: "word"}, - }, - { - input: " Date: Mon, 11 May 2026 13:43:40 -0700 Subject: [PATCH 32/42] planner, expression: address review feedback on MATCH...AGAINST LIKE fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Substantive changes (correctness): * Restrict the LIKE rewrite to a strict supported subset (`word`, `+word`, `-word`; alphanumeric only) via the new ValidateFTSSearchStringForLikeFallback. Phrases, wildcards, `> < ~` relevance modifiers, parens, and mid-word punctuation are rejected at plan time instead of producing rows that diverge from MySQL's tokenization. Strip the now-unreachable parser branches and align unit tests with the strict subset. * Restrict the LIKE rewrite to direct-boolean predicate positions via the new inDirectMatchBooleanContext (modeled on canTreatInSubqueryAsExistsForFilter). Scalar positions like MATCH ... IS NULL, MATCH ... > 0.5, MATCH ... = 0, and CASE-buried MATCHes now fall through to native instead of substituting a 0/1 integer where the user expects a float relevance score. * Reject non-default modifiers (Boolean / WITH QUERY EXPANSION) on the native path at plan time. tipb pushdown drops the modifier, so pushing a Boolean-mode MATCH down would silently execute as natural-language on TiFlash. The matchHasLikeFallbackRescue helper allows native emission only when the alt-rounds driver is expected to discard the plan and rebuild via fts-like-fallback. * Collapse the two flag scheme (HasFTSWithTiFlash + HasNonViableFTSMatch) into a single AlternativeLogicalPlanFTSLikeFallback by flipping round order: round 1 is now native (matching Alt-disabled), and fts-like-fallback fires only when round 1 records a non-viable predicate-context MATCH on the planBuilder. The driver discards round 1's plan and surfaces the alt round's error when no plan is produced (lastAltRoundErr), so the user sees the LIKE round's ErrNotSupportedYet rather than the generic sentinel. * Restrict BuildFTSToILikeExpressionFromBuiltin selectivity substitution to single-column MATCH; GetSelectivityByFilter declines multi-column expressions so a multi-column substitute would fall through to the same str-match default anyway. * Reorder the non-string column check ahead of the strict-subset validator in matchAgainstToLike so MATCH(int_col) AGAINST('xx-yy') surfaces the more actionable column-type error. Style and structure: * Split BuildFTSToILikeExpression into explicit mode-dispatch helpers (buildFTSBooleanModeILikeExpression, buildFTSNaturalLanguageModeILikeExpression) with a default-clause ErrNotSupportedYet for unknown modifiers. Factor the constant-0 builder into ftsZeroIntConst. * Update stale "TiFlash FTS index" / "fts-native" / partial-operator references in comments and the convertMatchAgainstToLike docstring to reflect the renamed fts-like-fallback round, the strict subset, and the direct-boolean-context gating. Tests: * Unit: TestValidateFTSSearchStringForLikeFallback (33 cases), TestBuildFTSToILikeExpressionFromBuiltin (nil/wrong-fn/single/ multi-column/NULL search/non-subset rejection). * Integration: NOT MATCH (36c), parenthesized MATCH (36d), scalar positions IS NULL/>0.5/=0/CASE (36e–36h), non-default modifier in scoring/scalar/alt-off contexts (39–42). Strict-subset rejections on previously-passing edge cases now expect error 1235. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/expression/fts_to_like.go | 430 ++++++++---------- pkg/expression/fts_to_like_test.go | 315 +++++++------ pkg/planner/cardinality/selectivity.go | 18 +- pkg/planner/core/expression_rewriter.go | 217 ++++++--- pkg/planner/core/fulltext_to_like.go | 60 ++- pkg/planner/core/planbuilder.go | 23 + pkg/planner/optimize.go | 75 +-- pkg/sessionctx/stmtctx/stmtctx.go | 24 +- .../r/planner/core/fulltext_search.result | 94 ++-- .../t/planner/core/fulltext_search.test | 130 +++++- 10 files changed, 820 insertions(+), 566 deletions(-) diff --git a/pkg/expression/fts_to_like.go b/pkg/expression/fts_to_like.go index e4cfcf4144acf..9fd62451212da 100644 --- a/pkg/expression/fts_to_like.go +++ b/pkg/expression/fts_to_like.go @@ -23,163 +23,51 @@ import ( "github.com/pingcap/tidb/pkg/types" ) -// ftsSearchTerm represents a single term in a Boolean fulltext search query. +// ftsSearchTerm represents a single token in a boolean-mode FTS search string +// surviving the strict-subset validator: a plain alphanumeric word optionally +// prefixed with `+` (required) or `-` (excluded). type ftsSearchTerm struct { word string - isRequired bool // Has '+' prefix - isExcluded bool // Has '-' prefix - // Note: Phrases (wrapped in quotes) and prefix wildcards ('*' suffix) are parsed but not - // treated differently from regular terms because LIKE %term% already matches the term anywhere. - // Proper phrase/prefix matching would require REGEXP to enforce word boundaries, which we - // avoid for simplicity. + isRequired bool + isExcluded bool } -// parseFTSBooleanSearchString parses a Boolean mode search string into individual terms. +// parseFTSBooleanSearchString splits a boolean-mode search string into terms. +// Inputs reach this function only after ValidateFTSSearchStringForLikeFallback +// has accepted them, so every whitespace-separated field is either a bare +// alphanumeric word or `+word`/`-word`. func parseFTSBooleanSearchString(text string) []ftsSearchTerm { - var terms []ftsSearchTerm - var current strings.Builder - inQuote := false - phraseIsRequired := false - phraseIsExcluded := false - i := 0 - - for i < len(text) { - ch := text[i] - - switch ch { - case '"': - if inQuote { - // End of phrase - // NOTE: Phrase matching in MySQL full-text search finds the exact phrase as a sequence - // of words (word boundaries are enforced). Using LIKE %phrase%, we cannot perfectly - // enforce word boundaries without REGEXP. For example, "quick brown" would match - // "aquick brownie" which MySQL full-text search would not match. This is an acceptable - // limitation for a fallback implementation. - phrase := current.String() - if phrase != "" { - terms = append(terms, ftsSearchTerm{ - word: phrase, - isRequired: phraseIsRequired, - isExcluded: phraseIsExcluded, - }) - } - current.Reset() - inQuote = false - phraseIsRequired = false - phraseIsExcluded = false - } else { - // Check for leading operator before the quote (e.g., +"phrase" or -"phrase") - if current.Len() > 0 { - prefix := current.String() - // Only extract operator if prefix is exactly "+" or "-" - // Otherwise, treat it as a regular word - if prefix == "+" { - phraseIsRequired = true - } else if prefix == "-" { - phraseIsExcluded = true - } else { - // Not an operator, parse as a regular word first - terms = append(terms, parseFTSSearchTerm(prefix)) - } - current.Reset() - } - // Start of phrase - inQuote = true - } - i++ - case ' ', '\t', '\n', '\r': - if inQuote { - current.WriteByte(ch) - } else if current.Len() > 0 { - // End of word - word := current.String() - terms = append(terms, parseFTSSearchTerm(word)) - current.Reset() - } - i++ - default: - current.WriteByte(ch) - i++ - } + fields := strings.Fields(text) + if len(fields) == 0 { + return nil } - - // Handle remaining content - if current.Len() > 0 { - if inQuote { - // Unclosed quote, treat as phrase and preserve operator flags - phrase := current.String() - if phrase != "" { - terms = append(terms, ftsSearchTerm{ - word: phrase, - isRequired: phraseIsRequired, - isExcluded: phraseIsExcluded, - }) - } - } else { - word := current.String() - terms = append(terms, parseFTSSearchTerm(word)) - } + terms := make([]ftsSearchTerm, 0, len(fields)) + for _, w := range fields { + terms = append(terms, parseFTSSearchTerm(w)) } - return terms } -// parseFTSSearchTerm parses a single search term (not in quotes) and extracts operators. +// parseFTSSearchTerm parses a single boolean-mode token. The strict-subset +// validator guarantees `word`, `+word`, or `-word` with an alphanumeric body, +// so only the leading operator needs interpretation. func parseFTSSearchTerm(word string) ftsSearchTerm { if word == "" { return ftsSearchTerm{} } - - term := ftsSearchTerm{word: word} - - // Check for leading operators - if word[0] == '+' { - term.isRequired = true - word = word[1:] - } else if word[0] == '-' { - term.isExcluded = true - word = word[1:] - } - - // Strip MySQL relevance modifiers >, <, ~ (treat term as optional in LIKE fallback). - // ~ in MySQL Boolean FTS decreases the relevance of a term without excluding it; - // >, < adjust the relevance contribution. All three map to "optional" here. - if len(word) > 0 && (word[0] == '>' || word[0] == '<' || word[0] == '~') { - word = word[1:] - } - - // Strip grouping parentheses that MySQL uses for sub-expression grouping - word = strings.Trim(word, "()") - - // Check for trailing wildcard and strip it (we don't use it differently, see struct comment) - if len(word) > 0 && word[len(word)-1] == '*' { - word = word[:len(word)-1] + switch word[0] { + case '+': + return ftsSearchTerm{word: word[1:], isRequired: true} + case '-': + return ftsSearchTerm{word: word[1:], isExcluded: true} } - - term.word = word - return term -} - -// stripFTSTokenPunctuation removes leading and trailing non-word characters from a -// natural-language search token so that punctuation attached to a word by the -// tokenizer (e.g. "MySQL," → "MySQL") is not included in the LIKE pattern. -// Non-ASCII bytes (> 127) are treated as word characters so multi-byte UTF-8 -// characters pass through unchanged. -func stripFTSTokenPunctuation(word string) string { - start := 0 - for start < len(word) && !isFTSWordByte(word[start]) { - start++ - } - end := len(word) - for end > start && !isFTSWordByte(word[end-1]) { - end-- - } - return word[start:end] + return ftsSearchTerm{word: word} } // isFTSWordByte returns true for alphanumeric ASCII and non-ASCII bytes. // Punctuation including underscore is NOT a word character, consistent with -// MySQL's built-in FTS tokenizer which treats _ as a word separator. +// MySQL's built-in FTS tokenizer which treats _ as a word separator. Used by +// ValidateFTSSearchStringForLikeFallback to gate the LIKE rewrite. func isFTSWordByte(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c > 127 } @@ -209,19 +97,60 @@ func escapeFTSLikePattern(term string) string { return result.String() } +// ValidateFTSSearchStringForLikeFallback reports whether searchText falls +// inside the strict subset that the LIKE fallback is allowed to translate. +// The supported subset is, by mode: +// +// - Boolean mode: each whitespace-separated token must be `word`, `+word`, +// or `-word`, where `word` consists of ASCII alphanumeric characters or +// non-ASCII UTF-8 bytes (the same definition used by isFTSWordByte). +// - Natural-language mode: each whitespace-separated token must be a `word` +// of the same alphanumeric form (no leading +/- operators). +// +// An empty or whitespace-only search string is valid; BuildFTSToILikeExpression +// short-circuits to a constant-0 result for it. +// +// Anything outside this subset (phrases, * prefix, > < ~ relevance modifiers, +// () grouping, mid-word punctuation like `xx-yy`, etc.) is rejected because +// MySQL FTS tokenizes those constructs in ways that differ from a substring +// LIKE match. The planner uses this signal to skip the LIKE fallback for +// rejected strings; the native FTSMysqlMatchAgainst builtin can still serve +// the query when an FTS index is available. +func ValidateFTSSearchStringForLikeFallback(searchText string, modifier ast.FulltextSearchModifier) error { + isBoolean := modifier.IsBooleanMode() + for _, token := range strings.Fields(searchText) { + body := token + if isBoolean && (body[0] == '+' || body[0] == '-') { + body = body[1:] + } + if body == "" { + return ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST search term '" + token + "' is not supported in the LIKE fallback") + } + for i := range len(body) { + if !isFTSWordByte(body[i]) { + return ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST search term '" + token + "' is not supported in the LIKE fallback") + } + } + } + return nil +} + // BuildFTSToILikeExpression converts a MATCH...AGAINST input (a list of column // expressions, the search-string literal, and the parsed modifier) into an // equivalent ILIKE-based predicate expression. // // Two callers share this conversion: -// - the planner's MATCH...AGAINST LIKE fallback rewrite, used as the -// executable plan when the "fts-native" alternative round is not viable; +// - the planner's MATCH...AGAINST LIKE fallback rewrite, used by the +// "fts-like-fallback" alternative round when round 1 reports that the +// native FTSMysqlMatchAgainst builtin cannot serve a predicate-context +// MATCH (no FTS index on a TiFlash replica, modifier not pushdown-supported); // - selectivity estimation, which substitutes the same ILIKE form for the -// opaque FTSMysqlMatchAgainst builtin so the two alternative rounds -// compete on cost using the same column-stats-derived row estimate -// (the native builtin cannot be evaluated in TiDB and would otherwise -// fall through to a flat default selectivity that ignores the column's -// histogram). +// opaque FTSMysqlMatchAgainst builtin so round 1's cost is computed from +// column statistics rather than a flat default — the native builtin +// cannot be evaluated in TiDB and would otherwise fall through to a +// SelectivityFactor (0.8) that ignores the column's histogram. // // Returns an integer (0/1) typed expression suitable for direct use as a // filter predicate. @@ -246,129 +175,153 @@ func BuildFTSToILikeExpression( return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback") } - zeroIntConst := func() Expression { - return &Constant{ - Value: types.NewIntDatum(0), - RetType: types.NewFieldType(mysql.TypeTiny), - } + // Reject search strings outside the strict supported subset before we + // translate. Callers that want a graceful fallback (e.g. the planner + // redirecting to the native builtin, or selectivity estimation falling + // through to a default estimate) should call this validator directly and + // react to its error. + if err := ValidateFTSSearchStringForLikeFallback(searchText, modifier); err != nil { + return nil, err } if searchText == "" { - return zeroIntConst(), nil + return ftsZeroIntConst(), nil } if modifier.IsBooleanMode() { - terms := parseFTSBooleanSearchString(searchText) - if len(terms) == 0 { - return zeroIntConst(), nil + return buildFTSBooleanModeILikeExpression(ctx, columns, searchText) + } + if modifier.IsNaturalLanguageMode() { + return buildFTSNaturalLanguageModeILikeExpression(ctx, columns, searchText) + } + return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST modifier is not supported in the LIKE fallback") +} + +// ftsZeroIntConst returns the constant-0 tiny-int expression used whenever +// the LIKE fallback can prove no row will match (empty search string, all +// terms tokenized away, or boolean-mode "only excluded" queries). +func ftsZeroIntConst() Expression { + return &Constant{ + Value: types.NewIntDatum(0), + RetType: types.NewFieldType(mysql.TypeTiny), + } +} + +// buildFTSBooleanModeILikeExpression handles `IN BOOLEAN MODE`. Required +// terms become an AND of per-term column-DNFs, excluded terms become NOT over +// per-term column-DNFs, and optional terms anchor the result only when no +// required terms exist (since LIKE cannot rank). +func buildFTSBooleanModeILikeExpression(ctx BuildContext, columns []Expression, searchText string) (Expression, error) { + terms := parseFTSBooleanSearchString(searchText) + if len(terms) == 0 { + return ftsZeroIntConst(), nil + } + + var required, excluded, optional []ftsSearchTerm + for _, term := range terms { + if term.word == "" { + continue } + if term.isRequired { + required = append(required, term) + } else if term.isExcluded { + excluded = append(excluded, term) + } else { + optional = append(optional, term) + } + } - var required, excluded, optional []ftsSearchTerm - for _, term := range terms { - if term.word == "" { - continue - } - if term.isRequired { - required = append(required, term) - } else if term.isExcluded { - excluded = append(excluded, term) - } else { - optional = append(optional, term) + // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns + // an empty result set. The LIKE fallback must match this: when there are + // no required and no optional terms, no row can possibly satisfy the + // search, so return a constant FALSE immediately. + if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { + return ftsZeroIntConst(), nil + } + + var allPredicates []Expression + + // For each required term: (col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range required { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err } + termColumnPreds = append(termColumnPreds, pred) } - - // MySQL Boolean mode: a query with only excluded terms ("-a -b") returns - // an empty result set. The LIKE fallback must match this: when there are - // no required and no optional terms, no row can possibly satisfy the - // search, so return a constant FALSE immediately. - if len(required) == 0 && len(optional) == 0 && len(excluded) > 0 { - return zeroIntConst(), nil + if len(termColumnPreds) > 0 { + allPredicates = append(allPredicates, ComposeDNFCondition(ctx, termColumnPreds...)) } + } - var allPredicates []Expression - - // For each required term: (col1 ILIKE %term% OR col2 ILIKE %term% ...) - for _, term := range required { - var termColumnPreds []Expression - for _, column := range columns { - pred, err := buildFTSILikePredicate(ctx, column, term.word) - if err != nil { - return nil, err - } - termColumnPreds = append(termColumnPreds, pred) + // For each excluded term: NOT(col1 ILIKE %term% OR col2 ILIKE %term% ...) + for _, term := range excluded { + var termColumnPreds []Expression + for _, column := range columns { + pred, err := buildFTSILikePredicate(ctx, column, term.word) + if err != nil { + return nil, err } - if len(termColumnPreds) > 0 { - allPredicates = append(allPredicates, ComposeDNFCondition(ctx, termColumnPreds...)) + termColumnPreds = append(termColumnPreds, pred) + } + if len(termColumnPreds) > 0 { + notPred, err := NewFunction(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), + ComposeDNFCondition(ctx, termColumnPreds...)) + if err != nil { + return nil, err } + allPredicates = append(allPredicates, notPred) } + } - // For each excluded term: NOT(col1 ILIKE %term% OR col2 ILIKE %term% ...) - for _, term := range excluded { - var termColumnPreds []Expression + // For optional terms: since LIKE cannot rank, treat optionals as a + // positive filter when no required terms exist. + // - required>0: ignore optionals (required terms already anchor the result) + // - required==0, excluded==0: at least one optional must match (pure optional query) + // - required==0, excluded>0: at least one optional must match AND excluded terms + // must be absent; AND the optional-DNF into allPredicates below + if len(optional) > 0 && len(required) == 0 { + var allOptionalPreds []Expression + for _, term := range optional { for _, column := range columns { pred, err := buildFTSILikePredicate(ctx, column, term.word) if err != nil { return nil, err } - termColumnPreds = append(termColumnPreds, pred) - } - if len(termColumnPreds) > 0 { - notPred, err := NewFunction(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), - ComposeDNFCondition(ctx, termColumnPreds...)) - if err != nil { - return nil, err - } - allPredicates = append(allPredicates, notPred) + allOptionalPreds = append(allOptionalPreds, pred) } } - - // For optional terms: since LIKE cannot rank, treat optionals as a - // positive filter when no required terms exist. - // - required>0: ignore optionals (required terms already anchor the result) - // - required==0, excluded==0: at least one optional must match (pure optional query) - // - required==0, excluded>0: at least one optional must match AND excluded terms - // must be absent; AND the optional-DNF into allPredicates below - if len(optional) > 0 && len(required) == 0 { - var allOptionalPreds []Expression - for _, term := range optional { - for _, column := range columns { - pred, err := buildFTSILikePredicate(ctx, column, term.word) - if err != nil { - return nil, err - } - allOptionalPreds = append(allOptionalPreds, pred) - } - } - if len(allOptionalPreds) > 0 { - optionalDNF := ComposeDNFCondition(ctx, allOptionalPreds...) - if len(excluded) == 0 { - return optionalDNF, nil - } - allPredicates = append(allPredicates, optionalDNF) + if len(allOptionalPreds) > 0 { + optionalDNF := ComposeDNFCondition(ctx, allOptionalPreds...) + if len(excluded) == 0 { + return optionalDNF, nil } + allPredicates = append(allPredicates, optionalDNF) } + } - if len(allPredicates) == 0 { - return zeroIntConst(), nil - } - - return ComposeCNFCondition(ctx, allPredicates...), nil + if len(allPredicates) == 0 { + return ftsZeroIntConst(), nil } - // Natural Language Mode: split into words and OR them together. + return ComposeCNFCondition(ctx, allPredicates...), nil +} + +// buildFTSNaturalLanguageModeILikeExpression handles the default +// natural-language mode by splitting the search string into whitespace +// tokens and OR-ing per-column per-word ILIKE predicates together. +func buildFTSNaturalLanguageModeILikeExpression(ctx BuildContext, columns []Expression, searchText string) (Expression, error) { words := strings.Fields(searchText) if len(words) == 0 { - return zeroIntConst(), nil + return ftsZeroIntConst(), nil } var columnPredicates []Expression for _, column := range columns { var wordPredicates []Expression for _, word := range words { - word = stripFTSTokenPunctuation(word) - if word == "" { - continue - } pred, err := buildFTSILikePredicate(ctx, column, word) if err != nil { return nil, err @@ -381,7 +334,7 @@ func BuildFTSToILikeExpression( } if len(columnPredicates) == 0 { - return zeroIntConst(), nil + return ftsZeroIntConst(), nil } return ComposeDNFCondition(ctx, columnPredicates...), nil @@ -394,6 +347,14 @@ func BuildFTSToILikeExpression( // stats engine; substituting an equivalent ILIKE expression lets the engine // reuse its TopN/histogram-based estimation paths instead of falling back // to a flat default that ignores column statistics. +// +// Restricted to single-column MATCH: GetSelectivityByFilter only estimates +// expressions over a single column, so a multi-column substituted ILIKE would +// be declined by the stats engine and fall through to the same str-match +// default that the un-substituted FTS expression already receives. Returning +// an error for the multi-column case lets the selectivity caller's existing +// err-check fall through cleanly, without producing a substitute that would +// never improve the estimate. func BuildFTSToILikeExpressionFromBuiltin(ctx BuildContext, fts *ScalarFunction) (Expression, error) { if fts == nil || fts.FuncName.L != ast.FTSMysqlMatchAgainst { return nil, errors.Errorf("expected %s, got %v", ast.FTSMysqlMatchAgainst, fts) @@ -402,6 +363,9 @@ func BuildFTSToILikeExpressionFromBuiltin(ctx BuildContext, fts *ScalarFunction) if len(args) < 2 { return nil, errors.Errorf("%s expects at least 2 args, got %d", ast.FTSMysqlMatchAgainst, len(args)) } + if len(args) > 2 { + return nil, ErrNotSupportedYet.GenWithStackByArgs("multi-column MATCH...AGAINST in selectivity substitution") + } againstConst, ok := args[0].(*Constant) if !ok { return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") diff --git a/pkg/expression/fts_to_like_test.go b/pkg/expression/fts_to_like_test.go index 22ed2289d482c..eb11f6195c82d 100644 --- a/pkg/expression/fts_to_like_test.go +++ b/pkg/expression/fts_to_like_test.go @@ -17,9 +17,79 @@ package expression import ( "testing" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/mock" "github.com/stretchr/testify/require" ) +func TestValidateFTSSearchStringForLikeFallback(t *testing.T) { + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + booleanMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode) + + tests := []struct { + name string + text string + modifier ast.FulltextSearchModifier + wantErr bool + }{ + // Natural-language mode: plain alphanumeric words only. + {name: "natural empty", text: "", modifier: naturalMode, wantErr: false}, + {name: "natural whitespace only", text: " \t\n ", modifier: naturalMode, wantErr: false}, + {name: "natural single word", text: "MySQL", modifier: naturalMode, wantErr: false}, + {name: "natural multi word", text: "MySQL tutorial PostgreSQL", modifier: naturalMode, wantErr: false}, + {name: "natural alphanumeric mix", text: "abc123 mysql8", modifier: naturalMode, wantErr: false}, + {name: "natural rejects mid-word dash", text: "x-x", modifier: naturalMode, wantErr: true}, + {name: "natural rejects punctuation suffix", text: "MySQL,", modifier: naturalMode, wantErr: true}, + {name: "natural rejects + operator", text: "+word", modifier: naturalMode, wantErr: true}, + {name: "natural rejects - operator", text: "-word", modifier: naturalMode, wantErr: true}, + {name: "natural rejects quote", text: `"phrase"`, modifier: naturalMode, wantErr: true}, + {name: "natural rejects wildcard", text: "word*", modifier: naturalMode, wantErr: true}, + {name: "natural rejects percent", text: "100%", modifier: naturalMode, wantErr: true}, + {name: "natural rejects underscore", text: "test_file", modifier: naturalMode, wantErr: true}, + + // Boolean mode: plain word, +word, -word with alphanumeric body only. + {name: "boolean empty", text: "", modifier: booleanMode, wantErr: false}, + {name: "boolean plain word", text: "MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean required word", text: "+MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean excluded word", text: "-MySQL", modifier: booleanMode, wantErr: false}, + {name: "boolean mix", text: "+apple -cherry pie", modifier: booleanMode, wantErr: false}, + {name: "boolean rejects mid-word dash", text: "xx-yy", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects bare operator", text: "+", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects bare minus", text: "-", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects + after body", text: "x+y", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects wildcard", text: "word*", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects required wildcard", text: "+word*", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects relevance gt", text: ">word", modifier: booleanMode, wantErr: true}, + {name: "boolean rejects relevance lt", text: " 127 case). + {name: "natural utf8 word", text: "你好", modifier: naturalMode, wantErr: false}, + {name: "boolean utf8 word", text: "+你好", modifier: booleanMode, wantErr: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateFTSSearchStringForLikeFallback(tt.text, tt.modifier) + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +// TestParseFTSBooleanSearchString covers the strict-subset inputs the boolean +// parser is expected to handle in production. Inputs outside the subset +// (phrases, wildcards, relevance modifiers, mid-word punctuation, etc.) are +// rejected upstream by ValidateFTSSearchStringForLikeFallback and therefore +// never reach this parser. func TestParseFTSBooleanSearchString(t *testing.T) { tests := []struct { input string @@ -39,27 +109,6 @@ func TestParseFTSBooleanSearchString(t *testing.T) { {word: "cherry", isExcluded: true}, }, }, - { - input: "apple*", - expected: []ftsSearchTerm{ - {word: "apple"}, - }, - }, - { - input: `"exact phrase"`, - expected: []ftsSearchTerm{ - {word: "exact phrase"}, - }, - }, - { - input: `+database +mysql -oracle "full text"`, - expected: []ftsSearchTerm{ - {word: "database", isRequired: true}, - {word: "mysql", isRequired: true}, - {word: "oracle", isExcluded: true}, - {word: "full text"}, - }, - }, { input: "word1 word2 word3", expected: []ftsSearchTerm{ @@ -68,19 +117,6 @@ func TestParseFTSBooleanSearchString(t *testing.T) { {word: "word3"}, }, }, - { - input: "+word1* -word2", - expected: []ftsSearchTerm{ - {word: "word1", isRequired: true}, - {word: "word2", isExcluded: true}, - }, - }, - { - input: `"unclosed quote`, - expected: []ftsSearchTerm{ - {word: "unclosed quote"}, - }, - }, { input: "word1\t\nword2", expected: []ftsSearchTerm{ @@ -89,61 +125,12 @@ func TestParseFTSBooleanSearchString(t *testing.T) { }, }, { - input: `+"required phrase"`, - expected: []ftsSearchTerm{ - {word: "required phrase", isRequired: true}, - }, - }, - { - input: `-"excluded phrase"`, - expected: []ftsSearchTerm{ - {word: "excluded phrase", isExcluded: true}, - }, - }, - { - input: `+"required phrase" optional -"excluded phrase"`, - expected: []ftsSearchTerm{ - {word: "required phrase", isRequired: true}, - {word: "optional"}, - {word: "excluded phrase", isExcluded: true}, - }, - }, - { - input: `+word1 +"required phrase" -word2 -"excluded phrase"`, - expected: []ftsSearchTerm{ - {word: "word1", isRequired: true}, - {word: "required phrase", isRequired: true}, - {word: "word2", isExcluded: true}, - {word: "excluded phrase", isExcluded: true}, - }, - }, - { - input: `abc"phrase"`, - expected: []ftsSearchTerm{ - {word: "abc"}, - {word: "phrase"}, - }, - }, - { - input: `word1 abc"phrase" word2`, - expected: []ftsSearchTerm{ - {word: "word1"}, - {word: "abc"}, - {word: "phrase"}, - {word: "word2"}, - }, - }, - { - input: `+"unclosed`, - expected: []ftsSearchTerm{ - {word: "unclosed", isRequired: true}, - }, + input: "", + expected: nil, }, { - input: `-"unclosed phrase`, - expected: []ftsSearchTerm{ - {word: "unclosed phrase", isExcluded: true}, - }, + input: " \t\n ", + expected: nil, }, } @@ -165,56 +152,14 @@ func TestParseFTSSearchTerm(t *testing.T) { input string expected ftsSearchTerm }{ - { - input: "+word", - expected: ftsSearchTerm{word: "word", isRequired: true}, - }, - { - input: "-word", - expected: ftsSearchTerm{word: "word", isExcluded: true}, - }, - { - input: "word*", - expected: ftsSearchTerm{word: "word"}, - }, - { - input: "+word*", - expected: ftsSearchTerm{word: "word", isRequired: true}, - }, - { - input: "word", - expected: ftsSearchTerm{word: "word"}, - }, - { - input: "", - expected: ftsSearchTerm{word: ""}, - }, - { - input: "+*", - expected: ftsSearchTerm{word: "", isRequired: true}, - }, - // MySQL relevance modifiers >, <, ~ are stripped; word is treated as optional - { - input: ">word", - expected: ftsSearchTerm{word: "word"}, - }, - { - input: " 0.5`, `IS NULL`, CASE, arithmetic, +// XOR, scalar function, etc.) means MATCH is being used as a scalar relevance +// score, where the LIKE rewrite's 0/1 output would diverge from the native +// float score and silently produce wrong rows. In those positions the +// rewriter must fall through to the native FTSMysqlMatchAgainst builtin, +// which preserves the relevance-score semantics (and errors at execution if +// no FTS index is available — the same behavior the user would see with +// alternative logical plans disabled). +func (er *expressionRewriter) inDirectMatchBooleanContext() bool { + if er.planCtx == nil { + return false + } + switch er.planCtx.builder.curClause { + case whereClause, havingClause, onClause: + default: + return false + } + if len(er.astNodeStack) == 0 { + return false + } + for i := len(er.astNodeStack) - 2; i >= 0; i-- { + switch n := er.astNodeStack[i].(type) { + case *ast.ParenthesesExpr: + case *ast.BinaryOperationExpr: + if n.Op != opcode.LogicAnd && n.Op != opcode.LogicOr { + return false + } + case *ast.UnaryOperationExpr: + if n.Op != opcode.Not && n.Op != opcode.Not2 { + return false + } + default: + return false + } + } + return true +} + +// matchHasLikeFallbackRescue reports whether matchAgainstToBuiltin is being +// invoked in a position where the alt-rounds driver will discard the produced +// plan and rebuild via the fts-like-fallback round. It is used by the modifier +// guard in matchAgainstToBuiltin to allow native emission of a non-default +// modifier when round 1's plan is destined for discard anyway. The rescue +// conditions mirror the ones in matchAgainstToExpression that trigger +// MarkNonViableFTSMatch — alternative logical plans enabled AND a direct +// boolean predicate context. +func (er *expressionRewriter) matchHasLikeFallbackRescue() bool { + if er.planCtx == nil || er.planCtx.builder == nil || er.planCtx.builder.ctx == nil { + return false + } + if !er.planCtx.builder.ctx.GetSessionVars().EnableAlternativeLogicalPlans { + return false + } + return er.inDirectMatchBooleanContext() +} + func (er *expressionRewriter) buildSemiApplyFromEqualSubq(np base.LogicalPlan, planCtx *exprRewriterPlanCtx, l, r expression.Expression, not, markNoDecorrelate bool) { intest.AssertNotNil(planCtx) if er.asScalar || not { @@ -2375,45 +2436,43 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { return } - // When alternative logical plans are enabled, AlternativeLogicalPlanFTSLikeFallback - // is set before the first build round and the expression rewriter converts - // MATCH...AGAINST to ILIKE predicates — but ONLY in filter/predicate clauses - // (WHERE, HAVING, JOIN ON). In scoring contexts (SELECT field list, ORDER BY) - // the result must be a float relevance score; the 0/1 ILIKE result would be - // semantically wrong and silently corrupt ORDER BY MATCH(...) DESC results. - // Those contexts always use the native FTSMysqlMatchAgainst builtin. + // Default behavior (Alt-disabled or Alt-enabled round 1) is to emit the + // native FTSMysqlMatchAgainst builtin. The alternative-rounds driver flips + // AlternativeLogicalPlanFTSLikeFallback to true and re-runs the build + // only when round 1 reported a direct-boolean-context MATCH that the + // native builtin cannot serve (no FTS index on a TiFlash replica / + // modifier not pushdown-supported). In that second pass the rewriter + // emits ILIKE for direct-boolean-context MATCH only — scoring contexts + // (SELECT field list / ORDER BY) and scalar predicate positions + // (IS NULL, comparisons, CASE, arithmetic) need the float relevance + // score, so they keep using the native builtin and will error at + // execution if no FTS index exists there. // - // When this is the first (ILIKE) round and the matched columns' table has - // TiFlash replicas, the HasFTSWithTiFlash signal is set so the "fts-native" - // alternative round is triggered. That round rebuilds the plan with the native - // builtin everywhere so TiFlash FTS can compete on cost. + // "Direct boolean context" requires that every ancestor up to the + // WHERE/HAVING/ON root is AND/OR/NOT/parens — see inDirectMatchBooleanContext. + // Limiting the LIKE rewrite to that subset preserves the 0/1-vs-float + // distinction: in scalar positions, `MATCH(...) IS NULL`, `MATCH(...) > 0.5`, + // etc. would silently produce wrong rows if the LIKE rewrite's integer + // result were substituted for the native float score. + // + // Round 1 also has to record viability before committing to native: if + // any boolean-context MATCH is non-viable, the resulting plan would + // fail at execution. The rewriter records that on the planBuilder so the + // round driver can invalidate the plan and trigger the fallback round. useLikeFallback := false if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { sessVars := er.planCtx.builder.ctx.GetSessionVars() - if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { - // Only rewrite to ILIKE in predicate (filter) clauses. - // SELECT field list and ORDER BY expect a float relevance score; - // the 0/1 ILIKE result must not substitute it. - switch er.planCtx.builder.curClause { - case whereClause, havingClause, onClause: + if er.inDirectMatchBooleanContext() { + if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { + // fts-like-fallback round: boolean-context MATCH rewrites to ILIKE. useLikeFallback = true - } - - // Signal the "fts-native" alternative round only when this MATCH is - // in a predicate context that actually rewrites to ILIKE — that is - // the only situation where the second round can produce a plan that - // differs from the first. In scoring contexts (SELECT field list / - // ORDER BY) the rewriter uses the native builtin in both rounds, so - // triggering the extra round adds planning overhead without ever - // changing the chosen plan. - // - // When fired, the viability check additionally requires that every - // matched column's table has an available TiFlash replica AND the - // column is covered by a public FULLTEXT index; otherwise the native - // path would degenerate into a full TiFlash scan, so we leave the - // flag clear and the LIKE fallback wins by default. - if useLikeFallback && !sessVars.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash { - er.checkFTSNativeViability(v.Modifier, numCols, stackLen, sessVars) + } else if sessVars.EnableAlternativeLogicalPlans && !er.ftsNativeViable(v.Modifier, numCols, stackLen) { + // Round 1 (native) but this boolean-context MATCH cannot run + // natively. Mark the build so the driver invalidates this plan and + // triggers fts-like-fallback. The rewrite continues with the native + // builtin to keep round 1 internally consistent; that plan is + // discarded after build completes. + er.planCtx.builder.MarkNonViableFTSMatch() } } } @@ -2425,10 +2484,9 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { } } -// checkFTSNativeViability sets AlternativeLogicalPlanHasFTSWithTiFlash only -// when the native FTSMysqlMatchAgainst builtin can plausibly be served on -// TiFlash for every column referenced in MATCH(...). It walks the resolved -// column FieldNames sitting on ctxNameStk (stack layout is +// ftsNativeViable reports whether the MATCH(...) currently being rewritten +// can be served on TiFlash by the native FTSMysqlMatchAgainst builtin. It +// walks the resolved column FieldNames sitting on ctxNameStk (stack layout is // [..., col1, ..., colN, against]) and requires for each column: // - the originating table has an available TiFlash replica; // - the column is covered by a public FULLTEXT index on that table. @@ -2438,31 +2496,28 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { // (only ScalarFuncSig_FTSMatchExpression is emitted regardless of modifier), // so a native plan that wins on cost would execute on TiFlash with the modifier // silently dropped. Until the modifier is carried in the pushdown protocol, we -// leave the flag clear for those modifiers and let the LIKE fallback win. -// -// Any unresolved column or any column failing any of the above checks leaves -// the flag clear, so the "fts-native" alternative round is skipped and the -// LIKE fallback wins by default. -func (er *expressionRewriter) checkFTSNativeViability(modifier ast.FulltextSearchModifier, numCols, stackLen int, sessVars *variable.SessionVars) { +// treat those modifiers as non-viable for native pushdown. +func (er *expressionRewriter) ftsNativeViable(modifier ast.FulltextSearchModifier, numCols, stackLen int) bool { if numCols <= 0 { - return + return false } if !ftsModifierAllowsNativePushdown(modifier) { - return + return false } builder := er.planCtx.builder + sessVars := builder.ctx.GetSessionVars() nameStart := stackLen - numCols - 1 for i := range numCols { name := er.ctxNameStk[nameStart+i] if name == nil { - return + return false } tblName := name.OrigTblName if tblName.L == "" { tblName = name.TblName } if tblName.L == "" { - return + return false } dbName := name.DBName if dbName.L == "" { @@ -2470,20 +2525,20 @@ func (er *expressionRewriter) checkFTSNativeViability(modifier ast.FulltextSearc } tblInfo, err := builder.is.TableInfoByName(dbName, tblName) if err != nil { - return + return false } if tblInfo.TiFlashReplica == nil || !tblInfo.TiFlashReplica.Available || tblInfo.TiFlashReplica.Count == 0 { - return + return false } colName := name.OrigColName if colName.L == "" { colName = name.ColName } if !tableHasPublicFTSIndexOnColumn(tblInfo, colName.L) { - return + return false } } - sessVars.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash = true + return true } // ftsModifierAllowsNativePushdown reports whether an FTS modifier can be @@ -2516,6 +2571,21 @@ func tableHasPublicFTSIndexOnColumn(tblInfo *model.TableInfo, columnNameL string // builtin scalar function which can be pushed down to TiFlash for execution // against a fulltext index. func (er *expressionRewriter) matchAgainstToBuiltin(v *ast.MatchAgainst, numCols, stackLen int) { + // Reject non-default modifiers when native is the final plan. The tipb + // pushdown protocol (see expression/distsql_builtin.go for the explicit + // note) does not serialize the FTS modifier, so TiFlash would silently + // execute Boolean-mode / query-expansion searches as natural-language + // mode. Until the modifier rides through pushdown, refuse to emit + // native here unless the alt-rounds driver is expected to discard this + // emission and rebuild via the fts-like-fallback round (which handles + // Boolean mode correctly via ILIKE; query expansion still errors there + // with a specific message). + if !ftsModifierAllowsNativePushdown(v.Modifier) && !er.matchHasLikeFallbackRescue() { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs( + "MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)") + return + } + against := er.ctxStack[stackLen-1] cols := er.ctxStack[stackLen-numCols-1 : stackLen-1] @@ -2552,17 +2622,6 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } - // The search string is baked into LIKE pattern constants at plan-build time. - // A cached plan would reuse the first execution's patterns for all subsequent - // executions, producing wrong results when the AGAINST argument is mutable - // across executions (a `?` parameter marker or a deferred expression such as - // a user variable). For a true literal the baked pattern is stable, so the - // plan is safe to cache; only mark it non-cacheable when the constant could - // vary at execution time. - if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { - er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") - } - searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) if err != nil { er.err = err @@ -2584,6 +2643,11 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } + // Reject non-string matched columns before any other LIKE-specific checks + // so the column-type error always wins. If we ran the strict-subset + // validator first, a query like MATCH(int_col) AGAINST('a-b') would + // surface "search term 'a-b' is not supported" — accurate but less + // actionable than "non-string column". columns := make([]expression.Expression, numCols) for i := range numCols { col := er.ctxStack[stackLen-numCols-1+i] @@ -2594,6 +2658,33 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s columns[i] = col } + // The LIKE fallback only translates a strict subset of MySQL FTS search + // strings (alphanumeric words, optionally prefixed with + or - in boolean + // mode). Anything outside that subset would tokenize differently in MySQL + // FTS than a substring LIKE match, so refuse it here. If the same MATCH + // is independently native-viable (FTS index + supported modifier), + // delegate to the native builtin so TiFlash serves it correctly; otherwise + // surface the error to the user. + if err := expression.ValidateFTSSearchStringForLikeFallback(searchText.GetString(), v.Modifier); err != nil { + if er.ftsNativeViable(v.Modifier, numCols, stackLen) { + er.matchAgainstToBuiltin(v, numCols, stackLen) + return + } + er.err = err + return + } + + // The search string is baked into LIKE pattern constants at plan-build time. + // A cached plan would reuse the first execution's patterns for all subsequent + // executions, producing wrong results when the AGAINST argument is mutable + // across executions (a `?` parameter marker or a deferred expression such as + // a user variable). For a true literal the baked pattern is stable, so the + // plan is safe to cache; only mark it non-cacheable when the constant could + // vary at execution time. + if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { + er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") + } + er.ctxStackPop(numCols + 1) result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) diff --git a/pkg/planner/core/fulltext_to_like.go b/pkg/planner/core/fulltext_to_like.go index 190624817cbe7..72f0cb04f519e 100644 --- a/pkg/planner/core/fulltext_to_like.go +++ b/pkg/planner/core/fulltext_to_like.go @@ -27,34 +27,46 @@ import ( // // This is a fallback rewrite since TiDB does not natively support full-text // search outside the TiFlash FTS path. The planner only invokes it in -// predicate clauses (WHERE / HAVING / JOIN ON) — scoring contexts -// (SELECT field list, ORDER BY) keep the native FTSMysqlMatchAgainst builtin -// so the result is a float relevance score rather than 0/1, even though the -// native path then requires TiFlash at execution time. The semantic -// differences below therefore apply to predicate use only: +// direct-boolean predicate positions — every ancestor up to the +// WHERE / HAVING / JOIN ON root must be AND / OR / NOT / parens +// (see inDirectMatchBooleanContext in expression_rewriter.go). Scoring +// contexts (SELECT field list, ORDER BY) and scalar predicate positions +// (IS NULL, comparisons, CASE, arithmetic) keep the native +// FTSMysqlMatchAgainst builtin so the result is a float relevance score +// rather than 0/1, even though the native path then requires TiFlash at +// execution time. The semantic differences below therefore apply to +// direct-boolean predicate use only: // // 1. No relevance scoring — the synthesized ILIKE predicate produces a 0/1 -// boolean filter result, which is the only thing a WHERE/HAVING/JOIN ON -// clause consumes. Relevance ranking (ORDER BY MATCH(...) DESC) and -// scalar SELECT MATCH(...) are intentionally NOT routed through this -// fallback for that reason; substituting 0/1 there would silently -// corrupt the sort or the projected score. -// 2. No stop word filtering - searches for all words regardless of length or commonness -// 3. No word length limits - MySQL ignores words shorter than ft_min_word_len (default 4) -// 4. No word boundaries - LIKE %term% matches substrings anywhere, not just complete words -// - Simple terms: "cat" matches "concatenate", "category", "application" -// (MySQL FTS only matches "cat" as a standalone word) -// - Prefix wildcard: "Optim*" matches "reOptimizing", "Optimizing" -// (MySQL FTS only matches words starting with "Optim" like "Optimizing", not "reOptimizing") -// - Phrase matching: "quick brown" matches "aquick brownie" -// (MySQL FTS only matches the exact phrase with word boundaries) -// This limitation exists because LIKE cannot enforce word boundaries without REGEXP +// boolean filter result, which is the only thing a direct-boolean +// predicate position consumes. Relevance-score positions (ORDER BY, +// scalar SELECT, MATCH ... = 0, MATCH ... > 0.5, etc.) are intentionally +// NOT routed through this fallback; substituting 0/1 there would +// silently corrupt the sort or the comparison. +// 2. No stop word filtering — searches for all words regardless of length +// or commonness. +// 3. No word length limits — MySQL ignores words shorter than +// ft_min_word_len (default 4); the ILIKE rewrite does not. +// 4. No word boundaries — LIKE %term% matches substrings anywhere, not just +// complete words. Example: "cat" matches "concatenate", "category", +// "application"; MySQL FTS only matches "cat" as a standalone word. +// Enforcing word boundaries would require REGEXP, which we avoid. +// 5. Performance — LIKE predicates cannot use full-text indexes (much +// slower on large datasets). // -// 5. Performance - LIKE predicates cannot use full-text indexes (much slower on large datasets) +// Search-string subset accepted by the rewrite (enforced upstream by +// expression.ValidateFTSSearchStringForLikeFallback): // -// Supported Boolean mode operators: + (required), - (excluded), * (prefix wildcard), "..." (phrase) -// Partially supported: ~ (treated as optional, ranking effect ignored), > < (treated as optional) -// Unsupported: WITH QUERY EXPANSION (returns an error), () sub-expression grouping (stripped) +// - Natural-language mode: whitespace-separated alphanumeric words only. +// - Boolean mode: each token is `word`, `+word` (required), or `-word` +// (excluded), where `word` is alphanumeric (ASCII or non-ASCII UTF-8). +// +// Anything outside that subset — phrases, * prefix, > < ~ relevance +// modifiers, () grouping, mid-word punctuation like `xx-yy` — is rejected +// at plan time with ErrNotSupportedYet because MySQL FTS tokenizes those +// constructs in ways a substring LIKE cannot reproduce. WITH QUERY +// EXPANSION is likewise rejected (no LIKE approximation exists for the +// second-pass tokenization). func (er *expressionRewriter) convertMatchAgainstToLike( columns []expression.Expression, searchText string, diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index 8b54d178b282f..a01e43fb90699 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -327,6 +327,29 @@ type PlanBuilder struct { allowBuildCastArray bool // resolveCtx is set when calling Build, it's only effective in the current Build call. resolveCtx *resolve.Context + + // nonViableFTSMatch is set during build when the expression rewriter + // encounters a predicate-context MATCH...AGAINST whose native form + // (FTSMysqlMatchAgainst) cannot be executed — the matched columns lack a + // public FULLTEXT index on a TiFlash-backed table, or the modifier is not + // supported by pushdown. The flag is read by the alternative-rounds driver + // after the round to invalidate the round's plan and trigger the + // fts-like-fallback round (see optimize.go). + nonViableFTSMatch bool +} + +// HasNonViableFTSMatch reports whether the most recent build round saw a +// predicate-context MATCH...AGAINST that could not be served by the native +// FTSMysqlMatchAgainst builtin. The caller (optimize.go) uses this to +// invalidate the round's plan and trigger the fts-like-fallback round. +func (b *PlanBuilder) HasNonViableFTSMatch() bool { + return b.nonViableFTSMatch +} + +// MarkNonViableFTSMatch records that a predicate-context MATCH...AGAINST in +// the current build cannot be served natively. See HasNonViableFTSMatch. +func (b *PlanBuilder) MarkNonViableFTSMatch() { + b.nonViableFTSMatch = true } type handleColHelper struct { diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 14ce59f56d5b1..93ac674631b2d 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -561,6 +561,17 @@ func buildAndOptimizeLogicalPlanRound( return nil, nil, false, err } + // If this round saw a predicate-context MATCH that cannot be served by the + // native FTSMysqlMatchAgainst builtin, the produced plan would fail at + // execution. Discard it and arm AlternativeLogicalPlanFTSLikeFallback so + // the fts-like-fallback alternative round rebuilds the plan with ILIKE. + // The flag also persists across any subsequent rounds (correlate, etc.) + // so their re-rewrites use ILIKE for predicate MATCHes too. + if builder.HasNonViableFTSMatch() { + sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + return p, names, false, nil + } + if *bestPlan == nil || cost < *bestCost { *bestCost = cost *bestPlan = finalPlan @@ -632,20 +643,19 @@ var alternativeRounds = [...]alternativeRound{ }, }, { - // fts-native: rebuild the plan using the native FTSMysqlMatchAgainst - // builtin so TiFlash FTS can compete on cost against the first round's - // ILIKE-based plan. Only enabled when the first round detected that the - // matched columns' table has TiFlash replicas — without TiFlash the native - // builtin can't be pushed down and would error at execution time. - name: "fts-native", + // fts-like-fallback: rebuild the plan rewriting predicate-context + // MATCH...AGAINST to ILIKE so the query can execute when the native + // FTSMysqlMatchAgainst builtin can't be served. Round 1 always uses + // the native builtin (same as Alt-disabled); the round driver sets + // AlternativeLogicalPlanFTSLikeFallback and invalidates round 1's plan + // only when round 1 saw a predicate-context MATCH whose columns lack + // a public FULLTEXT index on a TiFlash replica (or whose modifier is + // not pushdown-supported). When this round fires it is the only valid + // plan; round 1's plan was discarded. + name: "fts-like-fallback", enabled: func(sv *variable.SessionVars) bool { - return sv.EnableAlternativeLogicalPlans && sv.StmtCtx.AlternativeLogicalPlanHasFTSWithTiFlash - }, - setup: func(sv *variable.SessionVars) { - sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = false - }, - cleanup: func(sv *variable.SessionVars) { - sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + return sv.EnableAlternativeLogicalPlans && + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback }, }, } @@ -693,24 +703,12 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW if needRestoreLogicalPlanCtx { initialLogicalPlanCtx = saveLogicalPlanBuildCtx(sessVars) sessVars.StmtCtx.ResetAlternativeLogicalPlanSignals() - // Enable LIKE fallback for MATCH...AGAINST whenever alternative logical - // plans are active. The flag is set here so the first round rewrites - // predicate-context MATCH (WHERE / HAVING / JOIN ON) to ILIKE while - // scoring contexts (SELECT field / ORDER BY) still use the native - // builtin (only it produces a float relevance score). - // - // The flag also persists across most subsequent alternative rounds, so - // they likewise produce executable LIKE-based plans. The "fts-native" - // alternative round defined above is the exception: when triggered - // (TiFlash replica + public FULLTEXT index detected on every matched - // column during round 1), it clears this flag during setup so the - // round uses the native FTSMysqlMatchAgainst builtin everywhere and - // can compete on cost; cleanup restores the flag for following rounds. - // - // When alternative logical plans are disabled, the flag stays unset - // and every MATCH uses the native builtin path (which requires TiFlash - // at execution time). - sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + // Round 1 always uses the native FTSMysqlMatchAgainst builtin — same as + // the Alt-disabled default. If the build records a non-viable predicate + // MATCH on the planBuilder (no FTS index / no TiFlash replica), the + // round driver discards round 1's plan and sets + // AlternativeLogicalPlanFTSLikeFallback to trigger the fts-like-fallback + // alternative round, which re-builds using ILIKE for predicate MATCHes. } p, names, nonLogical, err := buildAndOptimizeLogicalPlanRound( @@ -749,6 +747,7 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW enabledRounds = append(enabledRounds, round) } } + var lastAltRoundErr error for _, round := range enabledRounds { restoreLogicalPlanBuildCtx(sessVars, initialLogicalPlanCtx) failpoint.Inject("failIfAlternativeLogicalPlanRoundTriggered", func(val failpoint.Value) { @@ -784,10 +783,15 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW }() if err != nil { // Alternative rounds are optional optimizations. If one fails, - // log and continue — the first round's plan is still valid. + // log and continue — the first round's plan is still valid in + // the general case. fts-like-fallback is the exception: the + // first round's plan may have been discarded as non-executable, + // so we remember the last alt-round error in case bestPlan + // remains nil after the loop. logutil.BgLogger().Warn("alternative logical plan round failed", zap.String("round", round.name), zap.Error(err)) + lastAltRoundErr = err continue } if nonLogical { @@ -795,6 +799,13 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW } } if bestPlan == nil { + if lastAltRoundErr != nil { + // No valid plan from any round. Surface the most recent alt-round + // error rather than the generic sentinel — typically this is the + // fts-like-fallback round reporting why MATCH...AGAINST cannot be + // rewritten (unsupported search string, etc.). + return nil, nil, 0, lastAltRoundErr + } return nil, nil, 0, errors.New("failed to build logical plan") } if needRestoreLogicalPlanCtx { diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 22eec39f4c939..972da8658b513 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -486,17 +486,20 @@ type StatementContext struct { // build round encountered a non-correlated IN subquery eligible for the // correlate-to-Apply alternative. AlternativeLogicalPlanPreferCorrelate bool - // AlternativeLogicalPlanFTSLikeFallback is a mode flag set before the - // first build round when alternative logical plans are enabled. When true, - // the expression rewriter converts MATCH...AGAINST to LIKE predicates - // (predicate contexts only) instead of the native FTSMysqlMatchAgainst builtin. + // AlternativeLogicalPlanFTSLikeFallback is a mode flag controlling how the + // expression rewriter handles MATCH...AGAINST in predicate contexts. When + // false (the default, matching Alt-disabled behavior) the rewriter emits + // the native FTSMysqlMatchAgainst builtin. When true, the rewriter emits + // ILIKE-based predicates instead. + // + // Round 1 always runs with this flag false. If the build phase finds any + // predicate-context MATCH that cannot be served natively (no FTS index on a + // matched column / no TiFlash replica / modifier not pushdown-supported), + // optimize.go invalidates the round-1 plan and sets this flag so the + // "fts-like-fallback" alternative round fires with the rewriter switched + // to ILIKE. The flag survives subsequent rounds so any further re-rewrite + // (correlate, etc.) keeps using ILIKE for the affected MATCHes. AlternativeLogicalPlanFTSLikeFallback bool - // AlternativeLogicalPlanHasFTSWithTiFlash is set during the first (ILIKE) - // build round when a MATCH...AGAINST expression is encountered AND the - // matched columns' table has TiFlash replicas. This triggers the "fts-native" - // alternative round so the native FTS builtin (pushed to TiFlash) can compete - // on cost against the ILIKE plan. - AlternativeLogicalPlanHasFTSWithTiFlash bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -677,7 +680,6 @@ func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { sc.AlternativeLogicalPlanSameOrderIndexJoin = false sc.AlternativeLogicalPlanOrderAwareJoinReorder = false sc.AlternativeLogicalPlanFTSLikeFallback = false - sc.AlternativeLogicalPlanHasFTSWithTiFlash = false sc.AlternativeLogicalPlanPreferCorrelate = false } diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index 659949b5df05e..36a7cf6916e84 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -32,12 +32,9 @@ id title 4 MySQL vs. PostgreSQL 5 MySQL Security select id, title from articles where match(title) against('Optim*' in boolean mode); -id title -3 Optimizing MySQL +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'Optim*' is not supported in the LIKE fallback' select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); -id title -1 MySQL Tutorial -2 How To Use MySQL Well +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"MySQL' is not supported in the LIKE fallback' select id, title from articles where match(title, body) against('+MySQL +database -PostgreSQL' in boolean mode); id title 5 MySQL Security @@ -70,44 +67,30 @@ insert into special_chars values (4, 'Path is C:\\Windows\\System32'), (5, 'Normal text without special chars'); select id, content from special_chars where match(content) against('100%'); -id content -1 Progress is at 100% +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '100%' is not supported in the LIKE fallback' select id, content from special_chars where match(content) against('test_file'); -id content -3 File name is test_file.txt +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'test_file' is not supported in the LIKE fallback' select id, content from special_chars where match(content) against('C:\\Windows'); -id content -4 Path is C:\Windows\System32 +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'C:\Windows' is not supported in the LIKE fallback' select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); -id content -1 Progress is at 100% +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+100%' is not supported in the LIKE fallback' drop table if exists special_chars; select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); id title select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); -id title -1 MySQL Tutorial +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"MySQL' is not supported in the LIKE fallback' select id, title from articles where match(title) against('+MySQL +tutorial -Security' in boolean mode); id title 1 MySQL Tutorial select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); -id title -1 MySQL Tutorial -2 How To Use MySQL Well -3 Optimizing MySQL -4 MySQL vs. PostgreSQL -5 MySQL Security +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+*' is not supported in the LIKE fallback' select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); id title 1 MySQL Tutorial 3 Optimizing MySQL select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); -id title -1 MySQL Tutorial -2 How To Use MySQL Well -3 Optimizing MySQL -4 MySQL vs. PostgreSQL +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '"How' is not supported in the LIKE fallback' select id, title from articles where match(title) against(' '); id title @@ -119,13 +102,11 @@ id title 4 MySQL vs. PostgreSQL 5 MySQL Security select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); -id title -1 MySQL Tutorial +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+"MySQL' is not supported in the LIKE fallback' select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); -id title +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '-"MySQL' is not supported in the LIKE fallback' select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); -id title -2 How To Use MySQL Well +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '+"How' is not supported in the LIKE fallback' select id, title from articles where match(title) against('tutorial -Security' in boolean mode); id title 1 MySQL Tutorial @@ -134,25 +115,13 @@ id title 1 MySQL Tutorial 4 MySQL vs. PostgreSQL select id, title from articles where match(title) against('MySQL, PostgreSQL.'); -id title -1 MySQL Tutorial -2 How To Use MySQL Well -3 Optimizing MySQL -4 MySQL vs. PostgreSQL -5 MySQL Security +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term 'MySQL,' is not supported in the LIKE fallback' select id, title from articles where match(title) against('>MySQL MySQL' is not supported in the LIKE fallback' select id, title from articles where match(title) against(NULL); id title select id, title from articles where match(title) against('~Security ~PostgreSQL' in boolean mode); -id title -4 MySQL vs. PostgreSQL -5 MySQL Security +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '~Security' is not supported in the LIKE fallback' select id, title from articles where match(title) against('MySQL' with query expansion); Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST WITH QUERY EXPANSION is not supported in the LIKE fallback' select id, match(title) against('MySQL') as score from articles; @@ -167,6 +136,29 @@ id title 5 MySQL Security select id from articles where match(id) against('MySQL'); Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id from articles where match(id) against('xx-yy'); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id, title from articles +where match(title) against('MySQL') and match(body) against('PostgreSQL'); +id title +4 MySQL vs. PostgreSQL +select id, title from articles where not match(title) against('MySQL'); +id title +select id, title from articles where (match(title) against('MySQL')); +id title +1 MySQL Tutorial +2 How To Use MySQL Well +3 Optimizing MySQL +4 MySQL vs. PostgreSQL +5 MySQL Security +select id, title from articles where (match(title) against('MySQL')) is null; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (match(title) against('MySQL')) > 0.5; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (match(title) against('MySQL')) = 0; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index +select id, title from articles where (case when match(title) against('MySQL') then 1 else 0 end) = 1; +Error 1105 (HY000): cannot use 'MATCH ... AGAINST' outside of fulltext index set @@tidb_enable_prepared_plan_cache=1; prepare st_fts_lit from 'select id, title from articles where match(title) against(''MySQL'')'; execute st_fts_lit; @@ -214,5 +206,15 @@ id title 4 MySQL vs. PostgreSQL deallocate prepare st_fts; set @@tidb_enable_prepared_plan_cache=DEFAULT; +select id, match(title) against('+MySQL' in boolean mode) as score from articles; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +select id, title from articles where (match(title) against('+MySQL' in boolean mode)) is null; +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +set @@tidb_opt_enable_alternative_logical_plans=OFF; +select id, title from articles where match(title) against('+MySQL' in boolean mode); +Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' +set @@tidb_opt_enable_alternative_logical_plans=ON; set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index eb0eb896c37c2..2cef8a766d444 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -24,10 +24,17 @@ select id, title from articles where match(title) against('+MySQL +tutorial' in # Test 4: Boolean Mode - Excluded Terms select id, title from articles where match(title) against('+MySQL -tutorial' in boolean mode); -# Test 5: Boolean Mode - Prefix Wildcard +# Test 5: Boolean Mode - Prefix Wildcard is rejected by the strict subset (LIKE +# cannot enforce word-start boundaries; MySQL FTS would only match words +# starting with the prefix). Falls back to native FTS path; without an FTS +# index, surfaces the rewrite error. +-- error 1235 select id, title from articles where match(title) against('Optim*' in boolean mode); -# Test 6: Boolean Mode - Exact Phrase +# Test 6: Boolean Mode - Exact Phrase is rejected by the strict subset (LIKE +# cannot enforce word boundaries inside a phrase). Falls back to native FTS +# path; without an FTS index, surfaces the rewrite error. +-- error 1235 select id, title from articles where match(title, body) against('"MySQL tutorial"' in boolean mode); # Test 7: Boolean Mode - Complex Query @@ -51,7 +58,11 @@ select id, title from articles where match(title) against('MySQL'); # Test 12: Natural Language Mode with single word select id, title from articles where match(title) against('PostgreSQL'); -# Test 13: Test escaping of special LIKE characters +# Test 13: Special characters in search strings are rejected by the strict +# subset (MySQL FTS treats %, _, \, : etc. as word separators or operators, +# so a substring LIKE on them would produce results inconsistent with MySQL +# FTS tokenization). Each rejection falls back to the native FTS path; without +# an FTS index, surfaces the rewrite error. drop table if exists special_chars; create table special_chars (id int primary key, content varchar(200)); insert into special_chars values @@ -61,16 +72,16 @@ insert into special_chars values (4, 'Path is C:\\Windows\\System32'), (5, 'Normal text without special chars'); -# Test searching for literal % character +-- error 1235 select id, content from special_chars where match(content) against('100%'); -# Test searching for literal _ character +-- error 1235 select id, content from special_chars where match(content) against('test_file'); -# Test searching for literal \ character +-- error 1235 select id, content from special_chars where match(content) against('C:\\Windows'); -# Test Boolean mode with special characters +-- error 1235 select id, content from special_chars where match(content) against('+100% +Progress' in boolean mode); drop table if exists special_chars; @@ -78,20 +89,23 @@ drop table if exists special_chars; # Test 14: Boolean mode - only excluded terms (no required/optional) select id, title from articles where match(title) against('-PostgreSQL -Security' in boolean mode); -# Test 15: Boolean mode - unclosed quote (should treat as phrase) +# Test 15: Boolean mode - quote is rejected by the strict subset. +-- error 1235 select id, title from articles where match(title) against('"MySQL tutorial' in boolean mode); # Test 16: Boolean mode - mixed whitespace (tabs and newlines) select id, title from articles where match(title) against('+MySQL +tutorial -Security' in boolean mode); -# Test 17: Boolean mode - empty word after operator removal (+* should be ignored) +# Test 17: Boolean mode - `*` is rejected by the strict subset. +-- error 1235 select id, title from articles where match(title) against('+MySQL +* tutorial' in boolean mode); # Test 18: Boolean mode - multiple excluded terms select id, title from articles where match(title) against('+MySQL -PostgreSQL -Security -Well' in boolean mode); -# Test 19: Boolean mode - all term types combined +# Test 19: Boolean mode - mixed `*` and quoted phrase are rejected. +-- error 1235 select id, title from articles where match(title) against('+MySQL -Security tutorial "How To" Optim*' in boolean mode); # Test 20: Natural language mode - only whitespace @@ -101,13 +115,16 @@ select id, title from articles where match(title) against(' # Test 21: Natural language mode - multiple spaces between words select id, title from articles where match(title) against('MySQL tutorial PostgreSQL'); -# Test 22: Boolean mode - required phrase with + operator +# Test 22: Boolean mode - required phrase rejected by strict subset. +-- error 1235 select id, title from articles where match(title) against('+"MySQL Tutorial"' in boolean mode); -# Test 23: Boolean mode - excluded phrase with - operator +# Test 23: Boolean mode - excluded phrase rejected by strict subset. +-- error 1235 select id, title from articles where match(title) against('-"MySQL Tutorial"' in boolean mode); -# Test 24: Boolean mode - mix of required/excluded phrases and words +# Test 24: Boolean mode - phrase mixed with words rejected by strict subset. +-- error 1235 select id, title from articles where match(title) against('+MySQL +"How To" -PostgreSQL' in boolean mode); # Test 25: Boolean mode - optional + excluded (optional treated as required filter) @@ -116,16 +133,21 @@ select id, title from articles where match(title) against('tutorial -Security' i # Test 26: Boolean mode - optional + excluded with multiple optionals select id, title from articles where match(title) against('tutorial PostgreSQL -Security' in boolean mode); -# Test 27: Natural language mode - punctuation attached to tokens is stripped +# Test 27: Natural language mode - punctuation in tokens rejected by strict +# subset. MySQL FTS would tokenize away the punctuation, but a substring LIKE +# would include it, so we refuse the rewrite. +-- error 1235 select id, title from articles where match(title) against('MySQL, PostgreSQL.'); -# Test 28: Boolean mode - MySQL relevance modifiers > < treated as optional +# Test 28: Boolean mode - relevance modifiers > < rejected by strict subset. +-- error 1235 select id, title from articles where match(title) against('>MySQL 0.5; + +# Test 36g: Scalar-position MATCH — explicit comparison to 0. Native returns +# the score (≥0). Coincidentally a LIKE 0/1 result agrees on "no match" +# rows, but we still route to native to preserve relevance-score semantics +# uniformly across scalar positions. +-- error 1105 +select id, title from articles where (match(title) against('MySQL')) = 0; + +# Test 36h: Scalar-position MATCH inside CASE WHEN. The WHEN expression takes +# a boolean condition, but the MATCH is buried under the CASE node, which is +# a non-boolean ancestor. Falls through to native. +-- error 1105 +select id, title from articles where (case when match(title) against('MySQL') then 1 else 0 end) = 1; + # Test 37: Plan cache - prepared statement with literal AGAINST IS cacheable. # The LIKE rewrite bakes the search string into pattern constants; for a true # literal those constants are stable across executions, so the plan must be @@ -182,6 +252,34 @@ set @@tidb_enable_prepared_plan_cache=DEFAULT; # rewrite time as a non-constant search string, so they never reach the # plan-cache decision and need no separate cache-skip coverage here. +# Test 39: Non-default modifier in a scoring context (SELECT field). LIKE +# cannot produce a float relevance score so it can't rescue this; the modifier +# guard in matchAgainstToBuiltin must error at plan time rather than emit a +# native FTS expression that TiFlash would silently execute as natural-language +# mode (the tipb pushdown protocol drops the modifier). +-- error 1235 +select id, match(title) against('+MySQL' in boolean mode) as score from articles; + +# Test 40: Non-default modifier in ORDER BY (scoring context). Same guard. +-- error 1235 +select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; + +# Test 41: Non-default modifier in a scalar predicate position (IS NULL). +# Even with alternative logical plans enabled, the LIKE round only rewrites +# direct-boolean MATCHes; the scalar position falls through to native, which +# must reject the modifier rather than mistranslate it on TiFlash. +-- error 1235 +select id, title from articles where (match(title) against('+MySQL' in boolean mode)) is null; + +# Test 42: Boolean mode in WHERE with alternative logical plans disabled. +# Without the fts-like-fallback rescue, native is the final plan, so the +# modifier guard must fire. (Pre-PR this query would push to TiFlash and +# silently execute as natural-language mode.) +set @@tidb_opt_enable_alternative_logical_plans=OFF; +-- error 1235 +select id, title from articles where match(title) against('+MySQL' in boolean mode); +set @@tidb_opt_enable_alternative_logical_plans=ON; + # Cleanup set @@tidb_opt_enable_alternative_logical_plans=OFF; drop table if exists articles; From bac401f06273e50d69da679634b12453f956a757 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 14:05:35 -0700 Subject: [PATCH 33/42] bazel update --- .claude/scheduled_tasks.lock | 1 + cmd/tidb-server/BUILD.bazel | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 .claude/scheduled_tasks.lock diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 0000000000000..5bf19fa251067 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"226a88a4-916e-431d-9706-de984bfe11e4","pid":99453,"procStart":"Sun May 3 19:08:19 2026","acquiredAt":1777995047016} \ No newline at end of file diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index 4ff8cdac2f33f..039c810a79eb3 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -107,7 +107,7 @@ go_test( srcs = ["main_test.go"], embed = [":tidb-server_lib"], flaky = True, - shard_count = 6, + shard_count = 7, deps = [ "//pkg/config", "//pkg/config/deploymode", From 4aa6f932efaebe7095d4355b4f1b59c42e1eca94 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 15:49:18 -0700 Subject: [PATCH 34/42] *: stop tracking Claude Code runtime state `.claude/scheduled_tasks.lock` is per-session runtime state written by the local Claude Code harness (sessionId, pid, etc.). It was committed by mistake in the previous bazel update and CI's license-header check is now failing because the file has no Apache header. Remove it from the index and gitignore the path along with `.claude/settings.local.json`, which is similarly per-user state and shouldn't end up in the tree. The intentional `.claude/skills/` documentation stays tracked. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/scheduled_tasks.lock | 1 - .gitignore | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) delete mode 100644 .claude/scheduled_tasks.lock diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock deleted file mode 100644 index 5bf19fa251067..0000000000000 --- a/.claude/scheduled_tasks.lock +++ /dev/null @@ -1 +0,0 @@ -{"sessionId":"226a88a4-916e-431d-9706-de984bfe11e4","pid":99453,"procStart":"Sun May 3 19:08:19 2026","acquiredAt":1777995047016} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1bd76a0654693..846b3c01f7e02 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,7 @@ var # Personal config files /*config.toml .cache + +# Claude Code runtime state (per-user, not part of repo) +.claude/scheduled_tasks.lock +.claude/settings.local.json From 49db4da25141152acb62fdb7ba675cdd8a86606c Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 15:49:30 -0700 Subject: [PATCH 35/42] planner: fix NULL search handling in MATCH...AGAINST LIKE fallback Two correctness bugs in matchAgainstToLike's NULL fast-path: * The plan-cache skip check ran AFTER the NULL early-return, so a prepared statement bound to NULL first would bake a constant-false plan and the cached plan could be reused for a later non-NULL bind, silently returning zero rows. Move the skip ahead of the NULL handling so a mutable AGAINST argument disables caching in every branch below. * The NULL emission was Constant(0), which under NOT collapses to NOT 0 = 1 and admits every row. Native MATCH on a NULL search returns NULL (builtin_fts.go evalReal), so NOT NULL = NULL filters the row. Emit Constant(NULL) instead so MySQL three-valued logic is preserved under NOT / IS NULL / IS NOT NULL. Tests: NOT MATCH(...) AGAINST(NULL) returns zero rows (Test 29a); parenthesized MATCH(...) AGAINST(NULL) returns zero rows (Test 29b); prepared statement bound to NULL then to 'PostgreSQL' returns the PostgreSQL row on the second execute and @@last_plan_from_cache = 0 (Test 38a). Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/planner/core/expression_rewriter.go | 34 ++++++++++++------- .../r/planner/core/fulltext_search.result | 18 ++++++++++ .../t/planner/core/fulltext_search.test | 31 ++++++++++++++++- 3 files changed, 69 insertions(+), 14 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 17937fc9740ea..c899680d227ac 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2622,6 +2622,19 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } + // The LIKE fallback bakes the search value into the produced plan — either + // as ILIKE pattern constants (non-NULL case) or as a Constant(NULL) + // short-circuit. A cached plan would reuse the first execution's baked + // value for later executions, producing wrong results whenever the AGAINST + // argument is mutable: a `?` parameter marker, a user variable, or another + // deferred expression. In particular, a NULL first bind would bake a + // Constant(NULL) plan and reuse it for a later non-NULL bind. Mark the + // plan non-cacheable here, before the NULL fast-path and before Eval, so + // the skip applies uniformly across all branches below. + if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { + er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") + } + searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) if err != nil { er.err = err @@ -2629,10 +2642,16 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s } if searchText.IsNull() { - // NULL search string matches nothing, consistent with native FTS behavior. + // NULL search yields NULL in MySQL FTS semantics + // (builtin_fts.go evalReal returns isNull=true for NULL args), so we + // emit Constant(NULL) rather than Constant(0). This preserves + // three-valued logic under NOT — NOT NULL = NULL filters the row — + // and under IS NULL / IS NOT NULL. A literal Constant(0) would make + // NOT(MATCH...) admit every row when the search is NULL, diverging + // from native semantics. er.ctxStackPop(numCols + 1) er.ctxStackAppend(&expression.Constant{ - Value: types.NewIntDatum(0), + Value: types.Datum{}, RetType: types.NewFieldType(mysql.TypeTiny), }, types.EmptyName) return @@ -2674,17 +2693,6 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } - // The search string is baked into LIKE pattern constants at plan-build time. - // A cached plan would reuse the first execution's patterns for all subsequent - // executions, producing wrong results when the AGAINST argument is mutable - // across executions (a `?` parameter marker or a deferred expression such as - // a user variable). For a true literal the baked pattern is stable, so the - // plan is safe to cache; only mark it non-cacheable when the constant could - // vary at execution time. - if expression.MaybeOverOptimized4PlanCache(er.sctx, constExpr) { - er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") - } - er.ctxStackPop(numCols + 1) result, err := er.convertMatchAgainstToLike(columns, searchText.GetString(), v.Modifier) diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index 36a7cf6916e84..f90473fdad47a 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -120,6 +120,10 @@ select id, title from articles where match(title) against('>MySQL MySQL' is not supported in the LIKE fallback' select id, title from articles where match(title) against(NULL); id title +select id, title from articles where not match(title) against(NULL); +id title +select id, title from articles where (match(title) against(NULL)); +id title select id, title from articles where match(title) against('~Security ~PostgreSQL' in boolean mode); Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST search term '~Security' is not supported in the LIKE fallback' select id, title from articles where match(title) against('MySQL' with query expansion); @@ -206,6 +210,20 @@ id title 4 MySQL vs. PostgreSQL deallocate prepare st_fts; set @@tidb_enable_prepared_plan_cache=DEFAULT; +set @@tidb_enable_prepared_plan_cache=1; +prepare st_fts_null from 'select id, title from articles where match(title) against(?)'; +set @q = NULL; +execute st_fts_null using @q; +id title +set @q = 'PostgreSQL'; +execute st_fts_null using @q; +id title +4 MySQL vs. PostgreSQL +select @@last_plan_from_cache; +@@last_plan_from_cache +0 +deallocate prepare st_fts_null; +set @@tidb_enable_prepared_plan_cache=DEFAULT; select id, match(title) against('+MySQL' in boolean mode) as score from articles; Error 1235 (42000): This version of TiDB doesn't yet support 'MATCH...AGAINST with this modifier on the native FTS path (modifier is not carried through pushdown to TiFlash)' select id, title from articles order by match(title) against('+MySQL' in boolean mode) desc; diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index 2cef8a766d444..a6a1c160f8109 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -143,9 +143,19 @@ select id, title from articles where match(title) against('MySQL, PostgreSQL.'); -- error 1235 select id, title from articles where match(title) against('>MySQL Date: Mon, 11 May 2026 16:16:18 -0700 Subject: [PATCH 36/42] expression: gate FTSMysqlMatchAgainst Flash pushdown on default modifier scalarExprSupportedByFlash unconditionally marked FTSMysqlMatchAgainst as Flash-pushdown-eligible, but the tipb protocol does not serialize the FTS modifier (see the explicit note in distsql_builtin.go): TiFlash reconstructs the signature with the default natural-language mode, so a Boolean-mode or WITH QUERY EXPANSION call pushed down would silently execute with the modifier dropped. The planner's modifier guard in matchAgainstToBuiltin already errors at plan time for non-default modifiers reaching a final-plan position, so the concrete user-facing bug isn't currently reachable. Add the local check here as defense in depth: scalarExprSupportedByFlash should be self-consistent so any future code path that builds an FTSMysqlMatchAgainst around the planner doesn't silently leak wrong semantics. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/expression/fts_to_like_test.go | 29 +++++++++++++++++++++++++++++ pkg/expression/infer_pushdown.go | 14 +++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/pkg/expression/fts_to_like_test.go b/pkg/expression/fts_to_like_test.go index eb11f6195c82d..746ec789d2eee 100644 --- a/pkg/expression/fts_to_like_test.go +++ b/pkg/expression/fts_to_like_test.go @@ -310,3 +310,32 @@ func TestBuildFTSToILikeExpressionFromBuiltin(t *testing.T) { require.Error(t, err) }) } + +func TestScalarExprSupportedByFlashRejectsNonDefaultFTSModifier(t *testing.T) { + // The tipb pushdown protocol does not serialize the FTS modifier; TiFlash + // reconstructs the signature with the default (natural-language) modifier. + // scalarExprSupportedByFlash must therefore mark non-default-modifier + // FTSMysqlMatchAgainst as NOT Flash-supported even though the function + // name is generally Flash-pushdown-eligible. This is defense in depth on + // top of the planner's modifier guard in matchAgainstToBuiltin. + ctx := mock.NewContext() + naturalMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode) + booleanMode := ast.FulltextSearchModifier(ast.FulltextSearchModifierBooleanMode) + queryExpansion := ast.FulltextSearchModifier(ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion) + + cases := []struct { + name string + modifier ast.FulltextSearchModifier + want bool + }{ + {"natural-language mode is Flash-supported", naturalMode, true}, + {"boolean mode is not Flash-supported", booleanMode, false}, + {"with-query-expansion is not Flash-supported", queryExpansion, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + sf := newFTSMatchAgainstForTest(t, ctx, "mysql", 1, tc.modifier) + require.Equal(t, tc.want, scalarExprSupportedByFlash(ctx.GetEvalCtx(), sf)) + }) + } +} diff --git a/pkg/expression/infer_pushdown.go b/pkg/expression/infer_pushdown.go index a1ca13fca32f4..2e510af3220d6 100644 --- a/pkg/expression/infer_pushdown.go +++ b/pkg/expression/infer_pushdown.go @@ -450,8 +450,20 @@ func scalarExprSupportedByFlash(ctx EvalContext, function *ScalarFunction) bool return true case ast.VecDims, ast.VecL1Distance, ast.VecL2Distance, ast.VecNegativeInnerProduct, ast.VecCosineDistance, ast.VecL2Norm, ast.VecAsText: return true - case ast.FTSMatchWord, ast.FTSMysqlMatchAgainst: + case ast.FTSMatchWord: return true + case ast.FTSMysqlMatchAgainst: + // The tipb pushdown protocol (see distsql_builtin.go) does not + // serialize the FTS modifier; TiFlash defaults to natural-language + // mode on the reconstructed signature. Pushing a Boolean-mode or + // WITH QUERY EXPANSION call down would therefore silently execute + // with the modifier dropped. Mark such calls as not Flash-supported + // here as a defense in depth — the planner's modifier guard in + // matchAgainstToBuiltin already rejects them at plan time, but + // keeping pushdown self-consistent guards against any future code + // path that builds an FTSMysqlMatchAgainst around the planner. + sig, ok := function.Function.(*builtinFtsMysqlMatchAgainstSig) + return ok && !sig.modifier.IsBooleanMode() && !sig.modifier.WithQueryExpansion() case ast.Grouping: // grouping function for grouping sets identification. return true } From 880361438dc277d03f72dfed5a6a010d893ef659 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 16:20:40 -0700 Subject: [PATCH 37/42] cardinality: route constant FTS substitutes to the constants bucket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BuildFTSToILikeExpressionFromBuiltin returns a *Constant for AGAINST(NULL) and empty-string searches (both substitute to constant 0). The previous type-assertion only accepted *ScalarFunction, so those constant substitutes fell through and got the original opaque FTSMysqlMatchAgainst routed to the str-match bucket with the 0.1 default selectivity — 10x higher than the correct constant-false (0) estimate. That can flip join order and index-selection decisions in plans containing MATCH...AGAINST(NULL). Switch to a type-switch so a *Constant substitute lands in notCoveredConstants, where the existing constant-folding pass recognizes it as constant-false. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/planner/cardinality/selectivity.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index bf746bde9f826..0dcb56608d4ae 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -267,8 +267,17 @@ func Selectivity( // receives. BuildFTSToILikeExpressionFromBuiltin returns an error // for the multi-column case to keep that path explicit here. if substitute, err := expression.BuildFTSToILikeExpressionFromBuiltin(ctx.GetExprCtx(), x); err == nil { - if subSF, ok := substitute.(*expression.ScalarFunction); ok { - notCoveredStrMatch[i] = subSF + switch sub := substitute.(type) { + case *expression.ScalarFunction: + notCoveredStrMatch[i] = sub + continue + case *expression.Constant: + // AGAINST(NULL) and empty-string search produce a + // constant substitute (Constant 0). Route to the + // constants bucket so the stats engine recognizes + // it as constant-false (selectivity 0) instead of + // applying the str-match default (0.1). + notCoveredConstants[i] = sub continue } } From b0b04c4a71909590cf9ae879d27af2068adf5178 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 18:09:32 -0700 Subject: [PATCH 38/42] expression: emit Constant(NULL) for AGAINST(NULL) in selectivity substitute BuildFTSToILikeExpressionFromBuiltin previously short-circuited NULL search constants to Constant(IntDatum(0)), while the planner-side matchAgainstToLike NULL fast-path emits Constant(NULL). The selectivity engine routes both to notCoveredConstants today and the bare-FTS case produces the same selectivity 0 either way, so the asymmetry isn't a current correctness bug. But under SQL three-valued logic NOT(0)=1 and NOT(NULL)=NULL, so any future cost path that composes NOT over the substitute would report opposite selectivity from native MATCH(NULL). Align the two paths to Constant(NULL) for consistency and to future-proof the substitution. Update the matching unit test to assert Constant(NULL) via Datum.IsNull() rather than the previous ToInt64()==0 check. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/expression/fts_to_like.go | 8 +++++++- pkg/expression/fts_to_like_test.go | 11 +++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pkg/expression/fts_to_like.go b/pkg/expression/fts_to_like.go index 9fd62451212da..86254cd4e840b 100644 --- a/pkg/expression/fts_to_like.go +++ b/pkg/expression/fts_to_like.go @@ -371,8 +371,14 @@ func BuildFTSToILikeExpressionFromBuiltin(ctx BuildContext, fts *ScalarFunction) return nil, ErrNotSupportedYet.GenWithStackByArgs("MATCH...AGAINST with non-constant search string") } if againstConst.Value.IsNull() { + // Match the planner-side matchAgainstToLike NULL fast-path: emit + // Constant(NULL) so the substitute preserves SQL three-valued logic + // even though selectivity estimation does not currently exploit the + // difference. Constant(0) here would, under any future cost path that + // composes NOT over the substitute, report "NOT 0 = TRUE → selectivity + // 1" — opposite of native MATCH(NULL) which returns NULL. return &Constant{ - Value: types.NewIntDatum(0), + Value: types.Datum{}, RetType: types.NewFieldType(mysql.TypeTiny), }, nil } diff --git a/pkg/expression/fts_to_like_test.go b/pkg/expression/fts_to_like_test.go index 746ec789d2eee..4f0581698c0a7 100644 --- a/pkg/expression/fts_to_like_test.go +++ b/pkg/expression/fts_to_like_test.go @@ -281,10 +281,11 @@ func TestBuildFTSToILikeExpressionFromBuiltin(t *testing.T) { require.Contains(t, err.Error(), "multi-column") }) - t.Run("NULL search constant returns zero", func(t *testing.T) { + t.Run("NULL search constant returns Constant(NULL)", func(t *testing.T) { // The builtin's getFunction allows NULL search constants explicitly - // (builtin_fts.go:129); the substitution must also short-circuit to a - // constant-0 expression rather than attempting to validate/translate. + // (builtin_fts.go:129); the substitution short-circuits to Constant(NULL) + // rather than Constant(0) so it composes correctly under SQL three-valued + // logic and matches the planner-side matchAgainstToLike NULL fast-path. stringTp := types.NewFieldType(mysql.TypeVarchar) nullArg := &Constant{Value: types.NewDatum(nil), RetType: stringTp} col := &Column{Index: 0, RetType: stringTp} @@ -297,9 +298,7 @@ func TestBuildFTSToILikeExpressionFromBuiltin(t *testing.T) { require.NoError(t, err) c, ok := expr.(*Constant) require.True(t, ok) - v, err := c.Value.ToInt64(types.DefaultStmtNoWarningContext) - require.NoError(t, err) - require.EqualValues(t, 0, v) + require.True(t, c.Value.IsNull(), "expected Constant(NULL), got %v", c.Value) }) t.Run("search string outside strict subset rejected", func(t *testing.T) { From aba6e184d0152e2e490ff4df4b9c49d8e9dba073 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 18:22:00 -0700 Subject: [PATCH 39/42] planner: move column-type check above NULL fast-path in LIKE fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LIKE fallback's NULL fast-path emitted Constant(NULL) before validating that the matched columns are string-typed. Round 1's matchAgainstToBuiltin (via getFunction in builtin_fts.go) already rejects non-string columns before round 2's matchAgainstToLike can run, so the gap was unreachable in current architecture — but the LIKE fallback should be self-consistent regardless of how it gets called: any future code path that reaches matchAgainstToLike directly with a non-string column and a NULL search would otherwise silently emit Constant(NULL) instead of erroring with the column-type message. Reorder so the column-type check runs first, before both the NULL fast-path and the strict-subset validator. Adds an integration regression (MATCH(id) AGAINST(NULL)) confirming the column-type error wins. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/planner/core/expression_rewriter.go | 33 ++++++++++--------- .../r/planner/core/fulltext_search.result | 2 ++ .../t/planner/core/fulltext_search.test | 7 ++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index c899680d227ac..3137969483357 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2635,6 +2635,24 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s er.sctx.SetSkipPlanCache("MATCH...AGAINST LIKE fallback bakes a mutable search string into plan constants") } + // Reject non-string matched columns before any value-based branch so the + // column-type error always wins. In current architecture round 1's + // matchAgainstToBuiltin → getFunction (builtin_fts.go) already rejects + // non-string columns before round 2 (this function) can run, but keep + // the check here too as defense in depth: the LIKE fallback's own NULL + // fast-path and strict-subset validator below should never accept a + // non-string column, regardless of any future code path that might + // reach this function around round 1. + columns := make([]expression.Expression, numCols) + for i := range numCols { + col := er.ctxStack[stackLen-numCols-1+i] + if col.GetType(er.sctx.GetEvalCtx()).EvalType() != types.ETString { + er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") + return + } + columns[i] = col + } + searchText, err := constExpr.Eval(er.sctx.GetEvalCtx(), chunk.Row{}) if err != nil { er.err = err @@ -2662,21 +2680,6 @@ func (er *expressionRewriter) matchAgainstToLike(v *ast.MatchAgainst, numCols, s return } - // Reject non-string matched columns before any other LIKE-specific checks - // so the column-type error always wins. If we ran the strict-subset - // validator first, a query like MATCH(int_col) AGAINST('a-b') would - // surface "search term 'a-b' is not supported" — accurate but less - // actionable than "non-string column". - columns := make([]expression.Expression, numCols) - for i := range numCols { - col := er.ctxStack[stackLen-numCols-1+i] - if col.GetType(er.sctx.GetEvalCtx()).EvalType() != types.ETString { - er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("Doesn't support match search on a non-string column without fulltext index") - return - } - columns[i] = col - } - // The LIKE fallback only translates a strict subset of MySQL FTS search // strings (alphanumeric words, optionally prefixed with + or - in boolean // mode). Anything outside that subset would tokenize differently in MySQL diff --git a/tests/integrationtest/r/planner/core/fulltext_search.result b/tests/integrationtest/r/planner/core/fulltext_search.result index f90473fdad47a..30e7acaf517ea 100644 --- a/tests/integrationtest/r/planner/core/fulltext_search.result +++ b/tests/integrationtest/r/planner/core/fulltext_search.result @@ -142,6 +142,8 @@ select id from articles where match(id) against('MySQL'); Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' select id from articles where match(id) against('xx-yy'); Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' +select id from articles where match(id) against(NULL); +Error 1235 (42000): This version of TiDB doesn't yet support 'Doesn't support match search on a non-string column without fulltext index' select id, title from articles where match(title) against('MySQL') and match(body) against('PostgreSQL'); id title diff --git a/tests/integrationtest/t/planner/core/fulltext_search.test b/tests/integrationtest/t/planner/core/fulltext_search.test index a6a1c160f8109..f1b64c52ce563 100644 --- a/tests/integrationtest/t/planner/core/fulltext_search.test +++ b/tests/integrationtest/t/planner/core/fulltext_search.test @@ -188,6 +188,13 @@ select id from articles where match(id) against('MySQL'); -- error 1235 select id from articles where match(id) against('xx-yy'); +# Test 36a-null: Non-string MATCH column with a NULL search argument. The +# column-type error must still win — the LIKE fallback's NULL fast-path +# must not silently emit Constant(NULL) when the matched column wouldn't +# be eligible in the first place. +-- error 1235 +select id from articles where match(id) against(NULL); + # Test 36b: Multi-MATCH in the same predicate with no FTS index on either # column. Round 1 sees both MATCHes as non-viable and the fts-like-fallback # alternative round must take over; if round 1's native plan were not From 57f98c4c03e24b5113183602f6b69b73dc004d6d Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 18:31:27 -0700 Subject: [PATCH 40/42] cardinality: refresh stale Constant(0) comment after Constant(NULL) change The substitution comment said "AGAINST(NULL) and empty-string search produce a constant substitute (Constant 0)" but commit b0b04c4a71 changed BuildFTSToILikeExpressionFromBuiltin to emit Constant(NULL) for AGAINST(NULL) (empty-string search still uses Constant(0) via ftsZeroIntConst). Update the comment to spell out which shape comes from which case and to note that the constant-folding pass below handles both shapes equivalently via its IsNull and ToBool branches. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/planner/cardinality/selectivity.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index 0dcb56608d4ae..c70f388c201e0 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -272,11 +272,15 @@ func Selectivity( notCoveredStrMatch[i] = sub continue case *expression.Constant: - // AGAINST(NULL) and empty-string search produce a - // constant substitute (Constant 0). Route to the - // constants bucket so the stats engine recognizes - // it as constant-false (selectivity 0) instead of - // applying the str-match default (0.1). + // AGAINST(NULL) produces Constant(NULL) (preserves SQL + // three-valued logic — matches the planner-side + // matchAgainstToLike NULL fast-path); empty-string + // search produces Constant(0). Route either to the + // constants bucket so the stats engine recognizes the + // substitute as constant-false (the IsNull / ToBool + // pass at line ~309 zeroes selectivity for both + // shapes) instead of applying the str-match default + // (0.1). notCoveredConstants[i] = sub continue } From a18872ceaa6500c10c1b63c5d4ccf5cb163b499e Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 11 May 2026 18:36:29 -0700 Subject: [PATCH 41/42] expression: add defensive bounds check before indexing FTS validator token ValidateFTSSearchStringForLikeFallback indexed body[0] inside the boolean- mode operator-strip branch without first checking len(body). The indexing is safe today because strings.Fields never returns an empty token, but the guard makes the bound explicit at the call site and protects against any future change to the tokenization that could produce an empty substring. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/expression/fts_to_like.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/expression/fts_to_like.go b/pkg/expression/fts_to_like.go index 86254cd4e840b..19e46dbbbe075 100644 --- a/pkg/expression/fts_to_like.go +++ b/pkg/expression/fts_to_like.go @@ -120,7 +120,11 @@ func ValidateFTSSearchStringForLikeFallback(searchText string, modifier ast.Full isBoolean := modifier.IsBooleanMode() for _, token := range strings.Fields(searchText) { body := token - if isBoolean && (body[0] == '+' || body[0] == '-') { + // strings.Fields never returns an empty token (consecutive whitespace + // is collapsed), so body[0] is safe today. Keep the len(body) > 0 + // guard explicit so the indexing is obviously bounded and the check + // stays correct if the tokenization ever changes. + if isBoolean && len(body) > 0 && (body[0] == '+' || body[0] == '-') { body = body[1:] } if body == "" { From ed3e7a3e8a74bebff5e7446938e0152f376c100b Mon Sep 17 00:00:00 2001 From: tpp Date: Tue, 12 May 2026 18:09:37 -0700 Subject: [PATCH 42/42] planner: restore FTS cost competition between native and ILIKE plans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 1 already runs with the native FTSMysqlMatchAgainst builtin; the fts-like-fallback alternative round previously fired only when round 1's plan was non-executable. Queries where both plans were valid (alphanumeric word search in a direct-boolean predicate position) had no competitor — the native plan won by default even when an ILIKE plan would have been cheaper given a selective non-FTS predicate. Split the planBuilder signal into two: - HasNonViableFTSMatch (existing): round 1's plan cannot execute; driver discards it and forces FTSLikeFallback across all subsequent rounds. - HasPredicateMatch (new): round 1 saw a direct-boolean-context MATCH. Driver propagates this into stmtctx as AlternativeLogicalPlanHasPredicateContextMatch, enabling the fts-like-fallback round for cost competition. The fts-like-fallback round now: - enables on (FTSLikeFallback || HasPredicateContextMatch), - via setup/cleanup saves and forces AlternativeLogicalPlanFTSLikeFallback true during its own build so the rewriter emits ILIKE, - restores the flag after the round. Behavior matrix: | native viable | predicate MATCH | LIKE round | outcome | | yes | yes | yes | strict-< cost compare | | yes | no (scoring) | no | native wins | | no | yes | yes | LIKE wins (discard) | | no | yes, LIKE bad | yes(error) | surface LIKE error | The existing fulltext_search integration suite (95 cases, all on a TiKV-only / no-FTS-index environment) exercises only the discard path and continues to pass unchanged. The new cost-competition branch requires a TiFlash replica + public FTS index in test fixtures and is not exercised by the current integration test environment. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/planner/core/expression_rewriter.go | 20 ++++--- pkg/planner/core/planbuilder.go | 21 ++++++++ pkg/planner/optimize.go | 69 ++++++++++++++++++------- pkg/sessionctx/stmtctx/stmtctx.go | 18 +++++-- 4 files changed, 98 insertions(+), 30 deletions(-) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 3137969483357..39ccf9a5696fc 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -2459,6 +2459,9 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { // any boolean-context MATCH is non-viable, the resulting plan would // fail at execution. The rewriter records that on the planBuilder so the // round driver can invalidate the plan and trigger the fallback round. + // Round 1 additionally records that a direct-boolean-context MATCH was + // seen so the driver runs the LIKE round for cost competition even when + // round 1's native plan is executable. useLikeFallback := false if er.planCtx != nil && er.planCtx.builder != nil && er.planCtx.builder.ctx != nil { sessVars := er.planCtx.builder.ctx.GetSessionVars() @@ -2466,13 +2469,16 @@ func (er *expressionRewriter) matchAgainstToExpression(v *ast.MatchAgainst) { if sessVars.StmtCtx.AlternativeLogicalPlanFTSLikeFallback { // fts-like-fallback round: boolean-context MATCH rewrites to ILIKE. useLikeFallback = true - } else if sessVars.EnableAlternativeLogicalPlans && !er.ftsNativeViable(v.Modifier, numCols, stackLen) { - // Round 1 (native) but this boolean-context MATCH cannot run - // natively. Mark the build so the driver invalidates this plan and - // triggers fts-like-fallback. The rewrite continues with the native - // builtin to keep round 1 internally consistent; that plan is - // discarded after build completes. - er.planCtx.builder.MarkNonViableFTSMatch() + } else if sessVars.EnableAlternativeLogicalPlans { + // Round 1 (native). Mark the build so the driver runs the LIKE + // round and cost-compares its plan against round 1's. If this + // MATCH cannot run natively, also mark the build as non-viable + // so the driver discards round 1's plan; the rewrite continues + // with the native builtin to keep round 1 internally consistent. + er.planCtx.builder.MarkPredicateMatch() + if !er.ftsNativeViable(v.Modifier, numCols, stackLen) { + er.planCtx.builder.MarkNonViableFTSMatch() + } } } } diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index a01e43fb90699..ceab10b9bd362 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -336,6 +336,13 @@ type PlanBuilder struct { // after the round to invalidate the round's plan and trigger the // fts-like-fallback round (see optimize.go). nonViableFTSMatch bool + + // predicateMatchSeen is set during build when the expression rewriter + // encounters a direct-boolean-context MATCH...AGAINST (one whose 0/1 boolean + // result is consumed directly as a predicate). The alternative-rounds driver + // uses this to enable the fts-like-fallback round even when round 1's + // native plan is executable, so the LIKE-based plan can compete on cost. + predicateMatchSeen bool } // HasNonViableFTSMatch reports whether the most recent build round saw a @@ -352,6 +359,20 @@ func (b *PlanBuilder) MarkNonViableFTSMatch() { b.nonViableFTSMatch = true } +// HasPredicateMatch reports whether the most recent build round saw a +// direct-boolean-context MATCH...AGAINST. The caller (optimize.go) uses this +// to decide whether to run the fts-like-fallback round for cost competition, +// independent of whether round 1's native plan is executable. +func (b *PlanBuilder) HasPredicateMatch() bool { + return b.predicateMatchSeen +} + +// MarkPredicateMatch records that the current build encountered a +// direct-boolean-context MATCH...AGAINST. See HasPredicateMatch. +func (b *PlanBuilder) MarkPredicateMatch() { + b.predicateMatchSeen = true +} + type handleColHelper struct { id2HandleMapStack []map[int64][]util.HandleCols stackTail int diff --git a/pkg/planner/optimize.go b/pkg/planner/optimize.go index 93ac674631b2d..0dc994d997aef 100644 --- a/pkg/planner/optimize.go +++ b/pkg/planner/optimize.go @@ -561,12 +561,21 @@ func buildAndOptimizeLogicalPlanRound( return nil, nil, false, err } + // Record predicate-context MATCH for cost competition. The fts-like-fallback + // alternative round reads this signal to decide whether to build a competing + // ILIKE-based plan alongside round 1's native plan, so the cheaper of the + // two wins via the normal alt-rounds cost comparison. + if builder.HasPredicateMatch() { + sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanHasPredicateContextMatch = true + } + // If this round saw a predicate-context MATCH that cannot be served by the // native FTSMysqlMatchAgainst builtin, the produced plan would fail at // execution. Discard it and arm AlternativeLogicalPlanFTSLikeFallback so - // the fts-like-fallback alternative round rebuilds the plan with ILIKE. - // The flag also persists across any subsequent rounds (correlate, etc.) - // so their re-rewrites use ILIKE for predicate MATCHes too. + // any intervening rounds (correlate, etc.) re-rewrite with ILIKE too. The + // fts-like-fallback round below also forces this flag during setup; this + // outer assignment covers the non-viable case where the flag must stay + // true across all subsequent rounds, not just inside the LIKE round. if builder.HasNonViableFTSMatch() { sctx.GetSessionVars().StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true return p, names, false, nil @@ -619,6 +628,12 @@ type alternativeRound struct { // wrapper. Safe because optimize is single-threaded per session. var savedEnableCorrelateSubquery bool +// savedFTSLikeFallback holds the pre-round value of +// AlternativeLogicalPlanFTSLikeFallback so the fts-like-fallback round's +// setup/cleanup can restore it after running with the flag forced on. Safe +// because optimize is single-threaded per session. +var savedFTSLikeFallback bool + var alternativeRounds = [...]alternativeRound{ { name: "non-decorrelate", @@ -644,18 +659,30 @@ var alternativeRounds = [...]alternativeRound{ }, { // fts-like-fallback: rebuild the plan rewriting predicate-context - // MATCH...AGAINST to ILIKE so the query can execute when the native - // FTSMysqlMatchAgainst builtin can't be served. Round 1 always uses - // the native builtin (same as Alt-disabled); the round driver sets - // AlternativeLogicalPlanFTSLikeFallback and invalidates round 1's plan - // only when round 1 saw a predicate-context MATCH whose columns lack - // a public FULLTEXT index on a TiFlash replica (or whose modifier is - // not pushdown-supported). When this round fires it is the only valid - // plan; round 1's plan was discarded. + // MATCH...AGAINST to ILIKE so it can compete with round 1's native plan + // on cost (and serve as the only valid plan when native is non-viable). + // Round 1 always uses the native builtin (same as Alt-disabled). This + // round fires whenever round 1 saw a direct-boolean-context MATCH + // (HasPredicateContextMatch) — both plans then compete via the strict-`<` + // cost comparison in buildAndOptimizeLogicalPlanRound — or whenever + // round 1 saw a MATCH whose native form cannot execute + // (FTSLikeFallback, set by the round driver after discarding round 1). + // In the discard case, round 1's plan is unavailable and this round's + // plan wins by default. name: "fts-like-fallback", enabled: func(sv *variable.SessionVars) bool { - return sv.EnableAlternativeLogicalPlans && - sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback + if !sv.EnableAlternativeLogicalPlans { + return false + } + return sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback || + sv.StmtCtx.AlternativeLogicalPlanHasPredicateContextMatch + }, + setup: func(sv *variable.SessionVars) { + savedFTSLikeFallback = sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = true + }, + cleanup: func(sv *variable.SessionVars) { + sv.StmtCtx.AlternativeLogicalPlanFTSLikeFallback = savedFTSLikeFallback }, }, } @@ -704,11 +731,17 @@ func optimize(ctx context.Context, sctx planctx.PlanContext, node *resolve.NodeW initialLogicalPlanCtx = saveLogicalPlanBuildCtx(sessVars) sessVars.StmtCtx.ResetAlternativeLogicalPlanSignals() // Round 1 always uses the native FTSMysqlMatchAgainst builtin — same as - // the Alt-disabled default. If the build records a non-viable predicate - // MATCH on the planBuilder (no FTS index / no TiFlash replica), the - // round driver discards round 1's plan and sets - // AlternativeLogicalPlanFTSLikeFallback to trigger the fts-like-fallback - // alternative round, which re-builds using ILIKE for predicate MATCHes. + // the Alt-disabled default. The build records two signals on the + // planBuilder when MATCH...AGAINST is seen: + // * HasPredicateMatch: any direct-boolean-context MATCH. The round + // driver propagates this into stmtctx to trigger the + // fts-like-fallback alternative round, which builds a competing + // ILIKE-based plan; the cheaper of the two wins. + // * HasNonViableFTSMatch: a predicate-context MATCH whose native form + // cannot execute (no FTS index / no TiFlash replica / unsupported + // modifier). The round driver discards round 1's plan and forces + // AlternativeLogicalPlanFTSLikeFallback true so all subsequent + // rounds (correlate, etc.) re-rewrite with ILIKE. } p, names, nonLogical, err := buildAndOptimizeLogicalPlanRound( diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 972da8658b513..3b35aa76e9d47 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -492,14 +492,21 @@ type StatementContext struct { // the native FTSMysqlMatchAgainst builtin. When true, the rewriter emits // ILIKE-based predicates instead. // - // Round 1 always runs with this flag false. If the build phase finds any + // Round 1 always runs with this flag false. The "fts-like-fallback" + // alternative round flips it to true (via its setup/cleanup) while it + // builds a competing ILIKE-based plan; the cost-cheapest plan wins via the + // normal alt-rounds cost comparison. If round 1's build records a // predicate-context MATCH that cannot be served natively (no FTS index on a // matched column / no TiFlash replica / modifier not pushdown-supported), - // optimize.go invalidates the round-1 plan and sets this flag so the - // "fts-like-fallback" alternative round fires with the rewriter switched - // to ILIKE. The flag survives subsequent rounds so any further re-rewrite - // (correlate, etc.) keeps using ILIKE for the affected MATCHes. + // optimize.go additionally invalidates round 1's plan and forces this flag + // true outside the round so any intervening rounds (correlate, etc.) also + // produce executable LIKE-based plans. AlternativeLogicalPlanFTSLikeFallback bool + // AlternativeLogicalPlanHasPredicateContextMatch indicates that round 1 + // encountered a direct-boolean-context MATCH...AGAINST. The round driver + // uses this to enable the fts-like-fallback round for cost competition even + // when round 1's native plan is executable. + AlternativeLogicalPlanHasPredicateContextMatch bool // IsExplainAnalyzeDML is true if the statement is "explain analyze DML executors", before responding the explain // results to the client, the transaction should be committed first. See issue #37373 for more details. @@ -680,6 +687,7 @@ func (sc *StatementContext) ResetAlternativeLogicalPlanSignals() { sc.AlternativeLogicalPlanSameOrderIndexJoin = false sc.AlternativeLogicalPlanOrderAwareJoinReorder = false sc.AlternativeLogicalPlanFTSLikeFallback = false + sc.AlternativeLogicalPlanHasPredicateContextMatch = false sc.AlternativeLogicalPlanPreferCorrelate = false }