diff --git a/br/tests/br_partial_index/run.sh b/br/tests/br_partial_index/run.sh new file mode 100644 index 0000000000000..44014f1d74226 --- /dev/null +++ b/br/tests/br_partial_index/run.sh @@ -0,0 +1,57 @@ +#!/bin/sh +# +# Copyright 2025 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu +DB="$TEST_NAME" + +run_sql "CREATE DATABASE $DB;" + +run_sql " +USE $DB; + +CREATE TABLE t0 ( + id int primary key, + col1 int, + col2 int, + key idx_col1 (col1) where col2 > 10 +); +INSERT INTO t0 VALUES (1, 1, 1); +INSERT INTO t0 VALUES (2, 2, 15); +INSERT INTO t0 VALUES (3, 3, 1); +INSERT INTO t0 VALUES (4, 4, 20); +INSERT INTO t0 VALUES (5, 5, 1); +" + +# backup table +echo "backup start..." +run_br --pd $PD_ADDR backup db -s "local://$TEST_DIR/$DB" --db $DB + +run_sql "DROP DATABASE $DB;" +run_sql "CREATE DATABASE $DB;" + +# restore table +echo "restore start..." +run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR + +if run_sql "admin check table ${DB}.t0;" | grep -q 'inconsistency'; then + echo "TEST: [$TEST_NAME] failed after restoring $DB.t0" + exit 1 +fi + +run_sql "show create table $DB.t0;" +check_contains "WHERE \`col2\` > 10" + +run_sql "DROP DATABASE $DB;" diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh index f8beea42d4a32..aba721ae477a9 100755 --- a/br/tests/run_group_br_tests.sh +++ b/br/tests/run_group_br_tests.sh @@ -27,8 +27,13 @@ groups=( ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table br_region_rule br_merge_option_attributes' ["G05"]='br_skip_checksum br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter br_partition_add_index br_pitr_log_restore_backup_compatibility' ["G06"]='br_tikv_outage br_tikv_outage3 br_restore_checkpoint br_encryption br_pitr_online_table_filter' +<<<<<<< HEAD ["G07"]='br_pitr br_restore_physical br_blocklist' ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption br_tiflash_conflict br_pitr_table_filter' +======= + ["G07"]='br_pitr br_restore_physical' + ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption br_tiflash_conflict br_pitr_table_filter br_partial_index' +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) ) # Get other cases not in groups, to avoid missing any case diff --git a/errors.toml b/errors.toml index 4c87fdd30c9b3..6085b8e153e69 100644 --- a/errors.toml +++ b/errors.toml @@ -1601,6 +1601,11 @@ error = ''' Invalid AFFINITY %s ''' +["ddl:8272"] +error = ''' +Cannot drop, change or modify column '%s': it is referenced in partial index '%s' +''' + ["ddl:9014"] error = ''' TiFlash backfill index failed: %s diff --git a/pkg/ddl/backfilling_operators.go b/pkg/ddl/backfilling_operators.go index 2bbdb29ecb682..aadb21751731a 100644 --- a/pkg/ddl/backfilling_operators.go +++ b/pkg/ddl/backfilling_operators.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/tidb/pkg/ddl/copr" "github.com/pingcap/tidb/pkg/ddl/ingest" "github.com/pingcap/tidb/pkg/ddl/session" + distsqlctx "github.com/pingcap/tidb/pkg/distsql/context" "github.com/pingcap/tidb/pkg/disttask/framework/proto" "github.com/pingcap/tidb/pkg/disttask/operator" "github.com/pingcap/tidb/pkg/kv" @@ -48,6 +49,7 @@ import ( "github.com/pingcap/tidb/pkg/table/tables" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util/chunk" + contextutil "github.com/pingcap/tidb/pkg/util/context" "github.com/pingcap/tidb/pkg/util/dbterror" "github.com/pingcap/tidb/pkg/util/intest" "github.com/pingcap/tidb/pkg/util/logutil" @@ -125,11 +127,14 @@ func NewAddIndexIngestPipeline( ) (*operator.AsyncPipeline, error) { indexes := make([]table.Index, 0, len(idxInfos)) for _, idxInfo := range idxInfos { - index := tables.NewIndex(tbl.GetPhysicalID(), tbl.Meta(), idxInfo) + index, err := tables.NewIndex(tbl.GetPhysicalID(), tbl.Meta(), idxInfo) + if err != nil { + return nil, err + } indexes = append(indexes, index) } reqSrc := getDDLRequestSource(model.ActionAddIndex) - copCtx, err := NewReorgCopContext(store, reorgMeta, tbl.Meta(), idxInfos, reqSrc) + copCtx, err := NewReorgCopContext(reorgMeta, tbl.Meta(), idxInfos, reqSrc) if err != nil { return nil, err } @@ -183,11 +188,14 @@ func NewWriteIndexToExternalStoragePipeline( ) (*operator.AsyncPipeline, error) { indexes := make([]table.Index, 0, len(idxInfos)) for _, idxInfo := range idxInfos { - index := tables.NewIndex(tbl.GetPhysicalID(), tbl.Meta(), idxInfo) + index, err := tables.NewIndex(tbl.GetPhysicalID(), tbl.Meta(), idxInfo) + if err != nil { + return nil, err + } indexes = append(indexes, index) } reqSrc := getDDLRequestSource(model.ActionAddIndex) - copCtx, err := NewReorgCopContext(store, reorgMeta, tbl.Meta(), idxInfos, reqSrc) + copCtx, err := NewReorgCopContext(reorgMeta, tbl.Meta(), idxInfos, reqSrc) if err != nil { return nil, err } @@ -272,6 +280,16 @@ type IndexRecordChunk struct { Chunk *chunk.Chunk Err error Done bool +<<<<<<< HEAD +======= + ctx *OperatorCtx + // tableScanRowCount is the number of rows scanned by the corresponding TableScanTask. + // If the index is a partial index, the number of rows in the Chunk may be less than tableScanRowCount. + tableScanRowCount int64 + // conditionPushed records whether the index condition has been pushed down. If it's true, the ingest worker + // can skip running the checker in TiDB side. + conditionPushed bool +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } // RecoverArgs implements workerpool.TaskMayPanic interface. @@ -451,6 +469,8 @@ func NewTableScanOperator( reorgMeta *model.DDLReorgMeta, cpOp ingest.CheckpointOperator, ) *TableScanOperator { + intest.AssertNotNil(reorgMeta) + totalCount := new(atomic.Int64) pool := workerpool.NewWorkerPool( "TableScanOperator", @@ -521,7 +541,20 @@ func (w *tableScanWorker) Close() error { return nil } +<<<<<<< HEAD func (w *tableScanWorker) scanRecords(task TableScanTask, sender func(IndexRecordChunk)) error { +======= +func (w *tableScanWorker) newDistSQLCtx() (*distsqlctx.DistSQLContext, error) { + warnHandler := contextutil.NewStaticWarnHandler(0) + return newReorgDistSQLCtxWithReorgMeta( + w.se.GetClient(), + w.reorgMeta, + warnHandler, + ) +} + +func (w *tableScanWorker) scanRecords(task TableScanTask, sender func(IndexRecordChunk)) { +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) logutil.Logger(w.ctx).Info("start a table scan task", zap.Int("id", task.ID), zap.Stringer("task", task)) @@ -531,7 +564,22 @@ func (w *tableScanWorker) scanRecords(task TableScanTask, sender func(IndexRecor failpoint.Return(errors.New("mock scan record error")) }) failpoint.InjectCall("scanRecordExec", w.reorgMeta) +<<<<<<< HEAD rs, err := buildTableScan(w.ctx, w.copCtx.GetBase(), startTS, task.Start, task.End) +======= + selExpr, err := w.copCtx.GetCondition() + if err != nil { + return err + } + + // create a new distsqlCtx for each task because the `distsqlCtx` contains `RuntimeStatsColl`, which + // will be modified during the execution. + distsqlCtx, err := w.newDistSQLCtx() + if err != nil { + return err + } + rs, conditionPushed, err := buildTableScan(scanCtx, w.copCtx.GetBase(), distsqlCtx, startTS, task.Start, task.End, selExpr) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if err != nil { return err } @@ -539,6 +587,7 @@ func (w *tableScanWorker) scanRecords(task TableScanTask, sender func(IndexRecor w.cpOp.AddChunk(task.ID, task.End) } var done bool + var lastTableScanRowCount int64 for !done { srcChk := w.getChunk() done, err = fetchTableScanResult(w.ctx, w.copCtx.GetBase(), rs, srcChk) @@ -547,7 +596,16 @@ func (w *tableScanWorker) scanRecords(task TableScanTask, sender func(IndexRecor terror.Call(rs.Close) return err } +<<<<<<< HEAD idxResults = append(idxResults, IndexRecordChunk{ID: task.ID, Chunk: srcChk, Done: done}) +======= + w.collector.Accepted(execDetails.UnpackedBytesReceivedKVTotal) + execDetails = kvutil.ExecDetails{} + + _, tableScanRowCount := distsqlCtx.RuntimeStatsColl.GetCopCountAndRows(tableScanCopID) + idxResults = append(idxResults, IndexRecordChunk{ID: task.ID, Chunk: srcChk, Done: done, ctx: w.ctx, tableScanRowCount: tableScanRowCount - lastTableScanRowCount, conditionPushed: conditionPushed}) + lastTableScanRowCount = tableScanRowCount +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } return rs.Close() }) @@ -557,12 +615,11 @@ func (w *tableScanWorker) scanRecords(task TableScanTask, sender func(IndexRecor for i, idxResult := range idxResults { sender(idxResult) - rowCnt := idxResult.Chunk.NumRows() if w.cpOp != nil { done := i == len(idxResults)-1 - w.cpOp.UpdateChunk(task.ID, rowCnt, done) + w.cpOp.UpdateChunk(task.ID, int(idxResult.tableScanRowCount), done) } - w.totalCount.Add(int64(rowCnt)) + w.totalCount.Add(idxResult.tableScanRowCount) } return nil @@ -635,6 +692,7 @@ func NewWriteExternalStoreOperator( writers = append(writers, writer) } +<<<<<<< HEAD return &indexIngestExternalWorker{ indexIngestBaseWorker: indexIngestBaseWorker{ ctx: ctx, @@ -649,7 +707,27 @@ func NewWriteExternalStoreOperator( totalCount: totalCount, }, rowCntListener: rowCntListener, +======= + w := &indexIngestWorker{ + ctx: ctx, + tbl: tbl, + indexes: indexes, + copCtx: copCtx, + se: nil, + sessPool: sessPool, + writers: writers, + srcChunkPool: srcChunkPool, + reorgMeta: reorgMeta, + totalCount: totalCount, +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) + } + err := w.initIndexConditionCheckers() + if err != nil { + w.ctx.onError(err) + return nil } + + return w }) return &WriteExternalStoreOperator{ AsyncOperator: operator.NewAsyncOperator(ctx, pool), @@ -711,12 +789,20 @@ func NewIndexIngestOperator( writers = append(writers, writer) } +<<<<<<< HEAD return &indexIngestLocalWorker{ indexIngestBaseWorker: indexIngestBaseWorker{ ctx: ctx, tbl: tbl, indexes: indexes, copCtx: copCtx, +======= + w := &indexIngestWorker{ + ctx: ctx, + tbl: tbl, + indexes: indexes, + copCtx: copCtx, +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) se: nil, sessPool: sessPool, @@ -727,6 +813,13 @@ func NewIndexIngestOperator( backendCtx: backendCtx, rowCntListener: rowCntListener, } + err := w.initIndexConditionCheckers() + if err != nil { + w.ctx.onError(err) + return nil + } + + return w }) return &IndexIngestOperator{ AsyncOperator: operator.NewAsyncOperator[IndexRecordChunk, IndexWriteResult](ctx, pool), @@ -785,9 +878,10 @@ func (w *indexIngestLocalWorker) HandleTask(ck IndexRecordChunk, send func(Index type indexIngestBaseWorker struct { ctx *workerpool.Context - tbl table.PhysicalTable - indexes []table.Index - reorgMeta *model.DDLReorgMeta + tbl table.PhysicalTable + indexes []table.Index + reorgMeta *model.DDLReorgMeta + indexConditionCheckers []func(row chunk.Row) (bool, error) copCtx copr.CopContext sessPool opSessPool @@ -806,21 +900,39 @@ func (w *indexIngestBaseWorker) HandleTask(rs IndexRecordChunk) (IndexWriteResul result := IndexWriteResult{ ID: rs.ID, } +<<<<<<< HEAD if err := w.initSessCtx(); err != nil { return result, err } count, _, err := w.WriteChunk(&rs) +======= + w.initSessCtx() + // TODO: find a place to display the added count + _, bytes, err := w.WriteChunk(&ck) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if err != nil { return result, err } +<<<<<<< HEAD if count == 0 { logutil.Logger(w.ctx).Info("finish a index ingest task", zap.Int("id", rs.ID)) return result, nil +======= + scannedCount := ck.tableScanRowCount + if scannedCount == 0 { + logutil.Logger(w.ctx).Info("finish a index ingest task", zap.Int("id", ck.ID)) + return +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } if w.totalCount != nil { - w.totalCount.Add(int64(count)) + w.totalCount.Add(scannedCount) } +<<<<<<< HEAD result.Added = count +======= + result.RowCnt = int(ck.tableScanRowCount) + result.Bytes = bytes +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if ResultCounterForTest != nil { ResultCounterForTest.Add(1) } @@ -843,7 +955,30 @@ func (w *indexIngestBaseWorker) initSessCtx() error { return nil } +<<<<<<< HEAD func (w *indexIngestBaseWorker) Close() error { +======= +func (w *indexIngestWorker) initIndexConditionCheckers() error { + if w.indexConditionCheckers != nil { + return nil + } + + w.indexConditionCheckers = make([]func(row chunk.Row) (bool, error), len(w.indexes)) + var err error + for i, index := range w.indexes { + if index.Meta().HasCondition() { + w.indexConditionCheckers[i], err = buildIndexConditionChecker(w.copCtx, w.tbl.Meta(), index.Meta()) + if err != nil { + return err + } + } + } + + return nil +} + +func (w *indexIngestWorker) Close() { +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) // TODO(lance6716): unify the real write action for engineInfo and external // writer. var gerr error @@ -874,7 +1009,18 @@ func (w *indexIngestBaseWorker) WriteChunk(rs *IndexRecordChunk) (count int, nex oprStartTime := time.Now() vars := w.se.GetSessionVars() sc := vars.StmtCtx +<<<<<<< HEAD cnt, lastHandle, err := writeChunk(w.ctx, w.writers, w.indexes, w.copCtx, sc.TimeZone(), sc.ErrCtx(), vars.GetWriteStmtBufs(), rs.Chunk, w.tbl.Meta()) +======= + + indexConditionCheckers := w.indexConditionCheckers + if rs.conditionPushed && len(w.indexes) == 1 { + // If the index condition has been pushed down to tikv side, and there's only one index, we can + // skip running the checker in TiDB side. + indexConditionCheckers = nil + } + cnt, kvBytes, err := writeChunk(w.ctx, w.writers, w.indexes, indexConditionCheckers, w.copCtx, sc.TimeZone(), sc.ErrCtx(), vars.GetWriteStmtBufs(), rs.Chunk, w.tbl.Meta()) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if err != nil || cnt == 0 { return 0, nil, err } diff --git a/pkg/ddl/backfilling_test.go b/pkg/ddl/backfilling_test.go index 83b66a01b6885..259269622d3b9 100644 --- a/pkg/ddl/backfilling_test.go +++ b/pkg/ddl/backfilling_test.go @@ -362,53 +362,6 @@ func assertDistSQLCtxEqual(t *testing.T, expected *distsqlctx.DistSQLContext, ac require.Equal(t, errctx.NewContextWithLevels(expected.ErrCtx.LevelMap(), expected.WarnHandler), actual.ErrCtx) } -// TestReorgExprContext is used in refactor stage to make sure the newDefaultReorgDistSQLCtx() is -// compatible with newMockReorgSessCtx(nil).GetDistSQLCtx() to make it safe to replace `mock.Context` usage. -// After refactor, the TestReorgExprContext can be removed. -func TestReorgDistSQLCtx(t *testing.T) { - store := &mockStorage{client: &mock.Client{}} - - // test default dist sql context - expected := newMockReorgSessCtx(store).GetDistSQLCtx() - defaultCtx := newDefaultReorgDistSQLCtx(store.client, expected.WarnHandler) - assertDistSQLCtxEqual(t, expected, defaultCtx) - - // test dist sql context from DDLReorgMeta - for _, reorg := range []model.DDLReorgMeta{ - { - SQLMode: mysql.ModeStrictTransTables | mysql.ModeAllowInvalidDates, - Location: &model.TimeZoneLocation{Name: "Asia/Tokyo"}, - ReorgTp: model.ReorgTypeIngest, - ResourceGroupName: "rg1", - }, - { - SQLMode: mysql.ModeAllowInvalidDates, - // should load location from system value when reorg.Location is nil - Location: nil, - ReorgTp: model.ReorgTypeTxnMerge, - ResourceGroupName: "rg2", - }, - } { - sctx := newMockReorgSessCtx(store) - require.NoError(t, initSessCtx(sctx, &reorg)) - expected = sctx.GetDistSQLCtx() - ctx, err := newReorgDistSQLCtxWithReorgMeta(store.client, &reorg, expected.WarnHandler) - require.NoError(t, err) - assertDistSQLCtxEqual(t, expected, ctx) - // Location should match DDLReorgMeta - if reorg.Location != nil { - require.Equal(t, reorg.Location.Name, ctx.Location.String()) - } else { - loc := timeutil.SystemLocation() - require.Same(t, loc, ctx.Location) - } - // ResourceGroupName should match DDLReorgMeta - require.Equal(t, reorg.ResourceGroupName, ctx.ResourceGroupName) - // Some fields should be different from the default context to make the test robust. - require.NotEqual(t, defaultCtx.ErrCtx.LevelMap(), ctx.ErrCtx.LevelMap()) - } -} - func TestValidateAndFillRanges(t *testing.T) { mkRange := func(start, end string) kv.KeyRange { return kv.KeyRange{StartKey: []byte(start), EndKey: []byte(end)} diff --git a/pkg/ddl/backfilling_txn_executor.go b/pkg/ddl/backfilling_txn_executor.go index a7d9552a72c05..f366a11a9c642 100644 --- a/pkg/ddl/backfilling_txn_executor.go +++ b/pkg/ddl/backfilling_txn_executor.go @@ -122,17 +122,12 @@ func (b *txnBackfillExecutor) resultChan() <-chan *backfillResult { // NewReorgCopContext creates a CopContext for reorg func NewReorgCopContext( - store kv.Storage, reorgMeta *model.DDLReorgMeta, tblInfo *model.TableInfo, allIdxInfo []*model.IndexInfo, requestSource string, ) (copr.CopContext, error) { warnHandler := contextutil.NewStaticWarnHandler(0) - distSQLCtx, err := newReorgDistSQLCtxWithReorgMeta(store.GetClient(), reorgMeta, warnHandler) - if err != nil { - return nil, err - } exprCtx, err := newReorgExprCtxWithReorgMeta(reorgMeta, warnHandler) if err != nil { @@ -145,7 +140,6 @@ func NewReorgCopContext( return copr.NewCopContext( exprCtx, - distSQLCtx, pushDownFlags, tblInfo, allIdxInfo, @@ -179,6 +173,7 @@ func newDefaultReorgDistSQLCtx(kvClient kv.Client, warnHandler contextutil.WarnA TiFlashQuerySpillRatio: variable.DefTiFlashQuerySpillRatio, ResourceGroupName: resourcegroup.DefaultResourceGroupName, ExecDetails: &execDetails, + RuntimeStatsColl: execdetails.NewRuntimeStatsColl(nil), } } @@ -289,7 +284,10 @@ func (b *txnBackfillExecutor) adjustWorkerSize() error { if err != nil { return err } - tmpIdxWorker := newMergeTempIndexWorker(backfillCtx, b.tbl, reorgInfo.elements) + tmpIdxWorker, err := newMergeTempIndexWorker(backfillCtx, b.tbl, reorgInfo.elements) + if err != nil { + return err + } runner = newBackfillWorker(b.ctx, tmpIdxWorker) worker = tmpIdxWorker case typeUpdateColumnWorker: diff --git a/pkg/ddl/bench_test.go b/pkg/ddl/bench_test.go index bfd2f47a1a5b1..0163da488f339 100644 --- a/pkg/ddl/bench_test.go +++ b/pkg/ddl/bench_test.go @@ -46,7 +46,7 @@ func BenchmarkExtractDatumByOffsets(b *testing.B) { tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("idx") sctx := tk.Session() - copCtx, err := ddl.NewReorgCopContext(store, ddl.NewDDLReorgMeta(sctx), tblInfo, []*model.IndexInfo{idxInfo}, "") + copCtx, err := ddl.NewReorgCopContext(ddl.NewDDLReorgMeta(sctx), tblInfo, []*model.IndexInfo{idxInfo}, "") require.NoError(b, err) require.IsType(b, copCtx, &copr.CopContextSingleIndex{}) require.NoError(b, err) @@ -86,7 +86,8 @@ func BenchmarkGenerateIndexKV(b *testing.B) { tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("idx") - index := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + index, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(b, err) sctx := tk.Session().GetSessionVars().StmtCtx idxDt := []types.Datum{types.NewIntDatum(10)} buf := make([]byte, 0, 64) diff --git a/pkg/ddl/column.go b/pkg/ddl/column.go index f033de19f2cb3..ca000ebf6da43 100644 --- a/pkg/ddl/column.go +++ b/pkg/ddl/column.go @@ -288,6 +288,10 @@ func isDroppableColumn(tblInfo *model.TableInfo, colName pmodel.CIStr) error { if err != nil { return err } + err = checkColumnReferencedByPartialCondition(tblInfo, colName) + if err != nil { + return err + } return nil } diff --git a/pkg/ddl/copr/BUILD.bazel b/pkg/ddl/copr/BUILD.bazel index 71f40bcb9ded6..59fcb21dba08e 100644 --- a/pkg/ddl/copr/BUILD.bazel +++ b/pkg/ddl/copr/BUILD.bazel @@ -6,7 +6,6 @@ go_library( importpath = "github.com/pingcap/tidb/pkg/ddl/copr", visibility = ["//visibility:public"], deps = [ - "//pkg/distsql/context", "//pkg/expression", "//pkg/expression/exprctx", "//pkg/infoschema", diff --git a/pkg/ddl/copr/copr_ctx.go b/pkg/ddl/copr/copr_ctx.go index e4736ffa78c41..16fd060dd7141 100644 --- a/pkg/ddl/copr/copr_ctx.go +++ b/pkg/ddl/copr/copr_ctx.go @@ -16,7 +16,6 @@ package copr import ( "github.com/pingcap/errors" - distsqlctx "github.com/pingcap/tidb/pkg/distsql/context" "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/expression/exprctx" // make sure mock.MockInfoschema is initialized to make sure the test pass @@ -33,6 +32,11 @@ type CopContext interface { GetBase() *CopContextBase IndexColumnOutputOffsets(idxID int64) []int IndexInfo(idxID int64) *model.IndexInfo + // GetCondition returns the condition of the index as an expression. + // If it's `nil`, it means we'll need to scan all the data to build the index. + // The condition represents the condition to push down in cop request, so it can be + // a single expression or a DNF expression. + GetCondition() (expression.Expression, error) } // CopContextBase contains common fields for CopContextSingleIndex and CopContextMultiIndex. @@ -40,7 +44,6 @@ type CopContextBase struct { TableInfo *model.TableInfo PrimaryKeyInfo *model.IndexInfo ExprCtx exprctx.BuildContext - DistSQLCtx *distsqlctx.DistSQLContext PushDownFlags uint64 RequestSource string @@ -70,9 +73,9 @@ type CopContextMultiIndex struct { } // NewCopContextBase creates a CopContextBase. +// `idxCols` contains all the index columns and also the columns referenced by the index condition. func NewCopContextBase( exprCtx exprctx.BuildContext, - distSQLCtx *distsqlctx.DistSQLContext, pushDownFlags uint64, tblInfo *model.TableInfo, idxCols []*model.IndexColumn, @@ -134,7 +137,6 @@ func NewCopContextBase( TableInfo: tblInfo, PrimaryKeyInfo: primaryIdx, ExprCtx: exprCtx, - DistSQLCtx: distSQLCtx, PushDownFlags: pushDownFlags, RequestSource: requestSource, ColumnInfos: colInfos, @@ -149,28 +151,34 @@ func NewCopContextBase( // NewCopContext creates a CopContext. func NewCopContext( exprCtx exprctx.BuildContext, - distSQLCtx *distsqlctx.DistSQLContext, pushDownFlags uint64, tblInfo *model.TableInfo, allIdxInfo []*model.IndexInfo, requestSource string, ) (CopContext, error) { if len(allIdxInfo) == 1 { - return NewCopContextSingleIndex(exprCtx, distSQLCtx, pushDownFlags, tblInfo, allIdxInfo[0], requestSource) + return NewCopContextSingleIndex(exprCtx, pushDownFlags, tblInfo, allIdxInfo[0], requestSource) } - return NewCopContextMultiIndex(exprCtx, distSQLCtx, pushDownFlags, tblInfo, allIdxInfo, requestSource) + return NewCopContextMultiIndex(exprCtx, pushDownFlags, tblInfo, allIdxInfo, requestSource) } // NewCopContextSingleIndex creates a CopContextSingleIndex. func NewCopContextSingleIndex( exprCtx exprctx.BuildContext, - distSQLCtx *distsqlctx.DistSQLContext, pushDownFlags uint64, tblInfo *model.TableInfo, idxInfo *model.IndexInfo, requestSource string, ) (*CopContextSingleIndex, error) { - base, err := NewCopContextBase(exprCtx, distSQLCtx, pushDownFlags, tblInfo, idxInfo.Columns, requestSource) + cols := idxInfo.Columns + neededCols, err := tables.ExtractColumnsFromCondition(exprCtx, idxInfo, tblInfo, false) + if err != nil { + return nil, err + } + cols = append(cols, neededCols...) + cols = tables.DedupIndexColumns(cols) + + base, err := NewCopContextBase(exprCtx, pushDownFlags, tblInfo, cols, requestSource) if err != nil { return nil, err } @@ -197,10 +205,32 @@ func (c *CopContextSingleIndex) IndexInfo(_ int64) *model.IndexInfo { return c.idxInfo } +// GetCondition implements the CopContext interface. +func (c *CopContextSingleIndex) GetCondition() (expression.Expression, error) { + if !c.idxInfo.HasCondition() { + return nil, nil + } + + schema, names := c.GetBase().GetSchemaAndNames() + + expr, err := expression.ParseSimpleExpr(c.GetBase().ExprCtx, + c.idxInfo.ConditionExprString, + expression.WithInputSchemaAndNames(schema, names, c.GetBase().TableInfo)) + if err != nil { + return nil, err + } + for _, col := range expression.ExtractColumns(expr) { + if col.VirtualExpr != nil { + // Virtual generated columns cannot be pushed down. + return nil, nil + } + } + return expr, nil +} + // NewCopContextMultiIndex creates a CopContextMultiIndex. func NewCopContextMultiIndex( exprCtx exprctx.BuildContext, - distSQLCtx *distsqlctx.DistSQLContext, pushDownFlags uint64, tblInfo *model.TableInfo, allIdxInfo []*model.IndexInfo, @@ -210,18 +240,19 @@ func NewCopContextMultiIndex( for _, idxInfo := range allIdxInfo { approxColLen += len(idxInfo.Columns) } - distinctOffsets := make(map[int]struct{}, approxColLen) allIdxCols := make([]*model.IndexColumn, 0, approxColLen) for _, idxInfo := range allIdxInfo { - for _, idxCol := range idxInfo.Columns { - if _, found := distinctOffsets[idxCol.Offset]; !found { - distinctOffsets[idxCol.Offset] = struct{}{} - allIdxCols = append(allIdxCols, idxCol) - } + allIdxCols = append(allIdxCols, idxInfo.Columns...) + + neededCols, err := tables.ExtractColumnsFromCondition(exprCtx, idxInfo, tblInfo, false) + if err != nil { + return nil, err } + allIdxCols = append(allIdxCols, neededCols...) } + allIdxCols = tables.DedupIndexColumns(allIdxCols) - base, err := NewCopContextBase(exprCtx, distSQLCtx, pushDownFlags, tblInfo, allIdxCols, requestSource) + base, err := NewCopContextBase(exprCtx, pushDownFlags, tblInfo, allIdxCols, requestSource) if err != nil { return nil, err } @@ -262,6 +293,37 @@ func (c *CopContextMultiIndex) IndexInfo(indexID int64) *model.IndexInfo { return nil } +// GetCondition implements the CopContext interface. +func (c *CopContextMultiIndex) GetCondition() (expression.Expression, error) { + exprs := make([]expression.Expression, 0, len(c.allIndexInfos)) + for _, idxInfo := range c.allIndexInfos { + if !idxInfo.HasCondition() { + return nil, nil + } + + schema, names := c.GetBase().GetSchemaAndNames() + expr, err := expression.ParseSimpleExpr(c.GetBase().ExprCtx, + idxInfo.ConditionExprString, + expression.WithInputSchemaAndNames(schema, names, c.GetBase().TableInfo)) + if err != nil { + return nil, err + } + for _, col := range expression.ExtractColumns(expr) { + if col.VirtualExpr != nil { + // Virtual generated columns cannot be pushed down. + return nil, nil + } + } + exprs = append(exprs, expr) + } + + // Use `OR` to combine all the conditions. + if len(exprs) > 0 { + return expression.ComposeDNFCondition(c.GetBase().ExprCtx, exprs...), nil + } + return nil, nil +} + func fillUsedColumns( usedCols map[int64]struct{}, idxCols []*model.IndexColumn, @@ -331,3 +393,31 @@ func collectVirtualColumnOffsetsAndTypes(ctx expression.EvalContext, cols []*exp } return offsets, fts } + +// GetSchemaAndNames returns the schema and nameslice returned from the internal cop request. +func (c *CopContextBase) GetSchemaAndNames() (*expression.Schema, types.NameSlice) { + exprColumns := make([]*expression.Column, 0, len(c.ExprColumnInfos)) + names := types.NameSlice{} + for i, col := range c.ExprColumnInfos { + newCol := col.Clone().(*expression.Column) + newCol.Index = i + exprColumns = append(exprColumns, newCol) + + // Specially handle the extra handle column. + // We cannot get the name of extra handle column from tableInfo. + var colName ast.CIStr + if col.ID == model.ExtraHandleID { + colName = model.ExtraHandleName + } else { + colName = c.TableInfo.Columns[col.Index].Name + } + + names = append(names, &types.FieldName{ + TblName: c.TableInfo.Name, + ColName: colName, + }) + } + schema := expression.NewSchema(exprColumns...) + + return schema, names +} diff --git a/pkg/ddl/copr/copr_ctx_test.go b/pkg/ddl/copr/copr_ctx_test.go index 34cde816a0738..50de58932c32a 100644 --- a/pkg/ddl/copr/copr_ctx_test.go +++ b/pkg/ddl/copr/copr_ctx_test.go @@ -109,7 +109,6 @@ func TestNewCopContextSingleIndex(t *testing.T) { sctx := mock.NewContext() copCtx, err := NewCopContextSingleIndex( sctx.GetExprCtx(), - sctx.GetDistSQLCtx(), sctx.GetSessionVars().StmtCtx.PushDownFlags(), mockTableInfo, mockIdxInfo, "", ) diff --git a/pkg/ddl/executor.go b/pkg/ddl/executor.go index 97d9d7cba28f7..200c45e6be1ef 100644 --- a/pkg/ddl/executor.go +++ b/pkg/ddl/executor.go @@ -3222,6 +3222,11 @@ func checkIsDroppableColumn(ctx sessionctx.Context, is infoschema.InfoSchema, sc if mysql.HasAutoIncrementFlag(col.GetFlag()) && !ctx.GetSessionVars().AllowRemoveAutoInc { return false, dbterror.ErrCantDropColWithAutoInc } + // Check the partial index condition + err = checkColumnReferencedByPartialCondition(t.Meta(), col.ColumnInfo.Name) + if err != nil { + return false, errors.Trace(err) + } return true, nil } @@ -4220,13 +4225,7 @@ func (e *executor) dropTableObject( tempTableType := tableInfo.Meta().TempTableType if config.CheckTableBeforeDrop && tempTableType == model.TempTableNone { - logutil.DDLLogger().Warn("admin check table before drop", - zap.String("database", fullti.Schema.O), - zap.String("table", fullti.Name.O), - ) - exec := ctx.GetRestrictedSQLExecutor() - internalCtx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnDDL) - _, _, err := exec.ExecRestrictedSQL(internalCtx, nil, "admin check table %n.%n", fullti.Schema.O, fullti.Name.O) + err := adminCheckTableBeforeDrop(ctx, fullti) if err != nil { return err } @@ -4298,6 +4297,40 @@ func (e *executor) dropTableObject( return nil } +// adminCheckTableBeforeDrop runs `admin check table` for the table to be dropped. +// Actually this function doesn't do anything specific for `DROP TABLE`, but to avoid +// using it in other places by mistake, it's named like this. +func adminCheckTableBeforeDrop(ctx sessionctx.Context, fullti ast.Ident) error { + logutil.DDLLogger().Warn("admin check table before drop", + zap.String("database", fullti.Schema.O), + zap.String("table", fullti.Name.O), + ) + exec := ctx.GetRestrictedSQLExecutor() + internalCtx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnDDL) + + // `tidb_enable_fast_table_check` is already the default value, and some feature (e.g. partial index) + // doesn't support admin check with `tidb_enable_fast_table_check = OFF`, so we just set it to `ON` here. + // TODO: set the value of `tidb_enable_fast_table_check` to 'ON' for all internal sessions if it's OK. + originalFastTableCheck := ctx.GetSessionVars().FastCheckTable + _, _, err := exec.ExecRestrictedSQL(internalCtx, nil, "set tidb_enable_fast_table_check = 'ON';") + if err != nil { + return err + } + if !originalFastTableCheck { + defer func() { + _, _, err = exec.ExecRestrictedSQL(internalCtx, nil, "set tidb_enable_fast_table_check = 'OFF';") + if err != nil { + logutil.DDLLogger().Warn("set tidb_enable_fast_table_check = 'OFF' failed", zap.Error(err)) + } + }() + } + _, _, err = exec.ExecRestrictedSQL(internalCtx, nil, "admin check table %n.%n", fullti.Schema.O, fullti.Name.O) + if err != nil { + return err + } + return nil +} + // DropTable will proceed even if some table in the list does not exists. func (e *executor) DropTable(ctx sessionctx.Context, stmt *ast.DropTableStmt) (err error) { return e.dropTableObject(ctx, stmt.Tables, stmt.IfExists, tableObject) @@ -4975,6 +5008,19 @@ func (e *executor) createIndex(ctx sessionctx.Context, ti ast.Ident, keyType ast return errors.Trace(err) } +<<<<<<< HEAD +======= + var conditionString string + if indexOption != nil { + conditionString, err = CheckAndBuildIndexConditionString(tblInfo, indexOption.Condition) + if err != nil { + return errors.Trace(err) + } + if len(conditionString) > 0 && !job.ReorgMeta.IsFastReorg { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs("add partial index without fast reorg is not supported") + } + } +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) args := &model.ModifyIndexArgs{ IndexArgs: []*model.IndexArg{{ Unique: unique, @@ -7129,3 +7175,42 @@ func (e *executor) RefreshMeta(sctx sessionctx.Context, args *model.RefreshMetaA err := e.doDDLJob2(sctx, job, args) return errors.Trace(err) } +<<<<<<< HEAD +======= + +func getScatterScopeFromSessionctx(sctx sessionctx.Context) string { + if val, ok := sctx.GetSessionVars().GetSystemVar(vardef.TiDBScatterRegion); ok { + return val + } + logutil.DDLLogger().Info("system variable tidb_scatter_region not found, use default value") + return vardef.DefTiDBScatterRegion +} + +func getEnableDDLAnalyze(sctx sessionctx.Context) string { + if val, ok := sctx.GetSessionVars().GetSystemVar(vardef.TiDBEnableDDLAnalyze); ok { + return val + } + logutil.DDLLogger().Info("system variable tidb_stats_update_during_ddl not found, use default value") + return variable.BoolToOnOff(vardef.DefTiDBEnableDDLAnalyze) +} + +func getAnalyzeVersion(sctx sessionctx.Context) string { + if val, ok := sctx.GetSessionVars().GetSystemVar(vardef.TiDBAnalyzeVersion); ok { + return val + } + logutil.DDLLogger().Info("system variable tidb_analyze_version not found, use default value") + return strconv.Itoa(vardef.DefTiDBAnalyzeVersion) +} + +// checkColumnReferencedByPartialCondition checks whether alter column is referenced by a partial index condition +func checkColumnReferencedByPartialCondition(t *model.TableInfo, colName ast.CIStr) error { + for _, idx := range t.Indices { + _, ic := model.FindIndexColumnByName(idx.AffectColumn, colName.L) + if ic != nil { + return dbterror.ErrModifyColumnReferencedByPartialCondition.GenWithStackByArgs(colName.O, idx.Name.O) + } + } + + return nil +} +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) diff --git a/pkg/ddl/export_test.go b/pkg/ddl/export_test.go index 4e0c1edb537ad..908870962e782 100644 --- a/pkg/ddl/export_test.go +++ b/pkg/ddl/export_test.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/pkg/errctx" "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" @@ -51,7 +52,11 @@ func FetchChunk4Test(copCtx copr.CopContext, tbl table.PhysicalTable, startKey, wctx := ddl.NewLocalWorkerCtx(context.Background(), 1) defer wctx.Cancel() src := testutil.NewOperatorTestSource(ddl.TableScanTask{ID: 1, Start: startKey, End: endKey}) +<<<<<<< HEAD scanOp := ddl.NewTableScanOperator(wctx, sessPool, copCtx, srcChkPool, 1, 0, nil, nil) +======= + scanOp := ddl.NewTableScanOperator(opCtx, sessPool, copCtx, srcChkPool, 1, 0, &model.DDLReorgMeta{}, nil, &execute.TestCollector{}) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) sink := testutil.NewOperatorTestSink[ddl.IndexRecordChunk]() operator.Compose[ddl.TableScanTask](src, scanOp) diff --git a/pkg/ddl/index.go b/pkg/ddl/index.go index 593453013ab1c..d60445c04b95d 100644 --- a/pkg/ddl/index.go +++ b/pkg/ddl/index.go @@ -46,6 +46,8 @@ import ( "github.com/pingcap/tidb/pkg/disttask/framework/storage" "github.com/pingcap/tidb/pkg/domain/infosync" "github.com/pingcap/tidb/pkg/errctx" + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/expression/exprstatic" "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/lightning/backend" @@ -417,7 +419,20 @@ func BuildIndexInfo( idxInfo.Tp = indexOption.Tp } idxInfo.Global = indexOption.Global +<<<<<<< HEAD setGlobalIndexVersion(tblInfo, idxInfo) +======= + + conditionString, err := CheckAndBuildIndexConditionString(tblInfo, indexOption.Condition) + if err != nil { + return nil, errors.Trace(err) + } + idxInfo.ConditionExprString = conditionString + idxInfo.AffectColumn, err = buildAffectColumn(idxInfo, tblInfo) + if err != nil { + return nil, errors.Trace(err) + } +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } else { // Use btree as default index type. idxInfo.Tp = pmodel.IndexTypeBtree @@ -1018,6 +1033,19 @@ func (w *worker) onCreateIndex(jobCtx *jobContext, job *model.Job, isPK bool) (v job.State = model.JobStateCancelled return ver, errors.Trace(err) } +<<<<<<< HEAD +======= + // The condition in the index option is not marshaled, so we need to set it here. + if len(arg.ConditionString) > 0 { + indexInfo.ConditionExprString = arg.ConditionString + // As we've updated the `ConditionExprString`, we need to rebuild the AffectColumn. + indexInfo.AffectColumn, err = buildAffectColumn(indexInfo, tblInfo) + if err != nil { + job.State = model.JobStateCancelled + return ver, errors.Trace(err) + } + } +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) allIndexInfos = append(allIndexInfos, indexInfo) } @@ -1172,6 +1200,12 @@ func initForReorgIndexes(w *worker, job *model.Job, idxInfos []*model.IndexInfo) if err != nil { return err } + // Partial Index is not supported without fast reorg. + for _, indexInfo := range idxInfos { + if (reorgTp == model.ReorgTypeTxn || reorgTp == model.ReorgTypeTxnMerge) && indexInfo.HasCondition() { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs("add partial index without fast reorg is not supported") + } + } loadCloudStorageURI(w, job) if reorgTp.NeedMergeProcess() { // Increase telemetryAddIndexIngestUsage @@ -2190,7 +2224,10 @@ func newAddIndexTxnWorker( continue } indexInfo := model.FindIndexInfoByID(t.Meta().Indices, elem.ID) - index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) + index, err := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) + if err != nil { + return nil, err + } allIndexes = append(allIndexes, index) } rowDecoder := decoder.NewRowDecoder(t, t.WritableCols(), decodeColMap) @@ -2319,7 +2356,9 @@ func (w *baseIndexWorker) fetchRowColVals(txn kv.Transaction, taskRange reorgBac if err != nil { return false, err } + for _, index := range w.indexes { +<<<<<<< HEAD actualHandle := handle // For global indexes V1+ on partitioned tables, we need to wrap the handle // with the partition ID to create a PartitionHandle. @@ -2330,6 +2369,12 @@ func (w *baseIndexWorker) fetchRowColVals(txn kv.Transaction, taskRange reorgBac actualHandle = kv.NewPartitionHandle(taskRange.physicalTable.GetPhysicalID(), handle) } idxRecord, err1 := w.getIndexRecord(index.Meta(), actualHandle, recordKey) +======= + if index.Meta().HasCondition() { + return false, dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs("add partial index without fast reorg") + } + idxRecord, err1 := w.getIndexRecord(index.Meta(), handle, recordKey) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if err1 != nil { return false, errors.Trace(err1) } @@ -2479,6 +2524,7 @@ func writeChunk( ctx context.Context, writers []ingest.Writer, indexes []table.Index, + indexConditionCheckers []func(row chunk.Row) (bool, error), copCtx copr.CopContext, loc *time.Location, errCtx errctx.Context, @@ -2520,6 +2566,7 @@ func writeChunk( if restore { restoreDataBuf = make([]types.Datum, len(c.HandleOutputOffsets)) } + for row := iter.Begin(); row != iter.End(); row = iter.Next() { handleDataBuf := ExtractDatumByOffsets(ectx, row, c.HandleOutputOffsets, c.ExprColumnInfos, handleDataBuf) if restore { @@ -2533,6 +2580,18 @@ func writeChunk( return 0, nil, errors.Trace(err) } for i, index := range indexes { + // If the `IndexRecordChunk.conditionPushed` is true and we have only 1 index, the `indexConditionCheckers` + // will not be initialized. + if index.Meta().HasCondition() && indexConditionCheckers != nil { + ok, err := indexConditionCheckers[i](row) + if err != nil { + return 0, 0, errors.Trace(err) + } + if !ok { + continue + } + } + idxID := index.Meta().ID idxDataBuf = ExtractDatumByOffsets(ectx, row, copCtx.IndexColumnOutputOffsets(idxID), c.ExprColumnInfos, idxDataBuf) @@ -3699,3 +3758,239 @@ func renameHiddenColumns(tblInfo *model.TableInfo, from, to pmodel.CIStr) { } } } +<<<<<<< HEAD +======= + +// CheckAndBuildIndexConditionString validates whether the given expression is compatible with +// the table schema and returns a string representation of the expression. +func CheckAndBuildIndexConditionString(tblInfo *model.TableInfo, indexConditionExpr ast.ExprNode) (string, error) { + if indexConditionExpr == nil { + return "", nil + } + + // Be careful, in `CREATE TABLE` statement, the `tblInfo.Partition` is always nil here. We have to + // check it in `buildTablePartitionInfo` again. + if tblInfo.Partition != nil { + return "", dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + "partial index on partitioned table is not supported") + } + + // check partial index condition expression + err := checkIndexCondition(tblInfo, indexConditionExpr) + if err != nil { + return "", errors.Trace(err) + } + + var sb strings.Builder + restoreFlags := format.RestoreStringSingleQuotes | format.RestoreKeyWordLowercase | format.RestoreNameBackQuotes | + format.RestoreSpacesAroundBinaryOperation | format.RestoreWithoutSchemaName | format.RestoreWithoutTableName + restoreCtx := format.NewRestoreCtx(restoreFlags, &sb) + sb.Reset() + err = indexConditionExpr.Restore(restoreCtx) + if err != nil { + return "", errors.Trace(err) + } + + return sb.String(), nil +} + +func checkIndexCondition(tblInfo *model.TableInfo, indexCondition ast.ExprNode) error { + // Only the following expressions are supported: + // 1. column IS NULL + // 2. column IS NOT NULL + // 3. column = / != / > / < / >= / <= const + // The column must be a visible column in the table, and the const must be a literal value with + // the same type as the column. + // The column must **NOT** be a generated column. We can loosen this restriction in the future. + // + // TODO: support more expressions in the future. + if indexCondition == nil { + return nil + } + + switch cond := indexCondition.(type) { + case *ast.IsNullExpr: + // `IS NULL` and `IS NOT NULL` are both in this branch. + columnName, ok := cond.Expr.(*ast.ColumnNameExpr) + if !ok { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + "partial index condition must include a column name in the IS NULL expression") + } + columnInfo := model.FindColumnInfo(tblInfo.Columns, columnName.Name.Name.L) + if columnInfo == nil { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("column name %s referenced in partial index condition is not found in table", + columnName.Name.Name.L)) + } + if columnInfo.IsGenerated() { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("generated column %s cannot be used in partial index condition", columnName.Name.Name.L)) + } + + return nil + case *ast.BinaryOperationExpr: + if cond.Op != opcode.EQ && cond.Op != opcode.NE && cond.Op != opcode.GT && + cond.Op != opcode.LT && cond.Op != opcode.GE && cond.Op != opcode.LE { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("binary operation %s is not supported", cond.Op.String())) + } + + var columnName *ast.ColumnNameExpr + var anotherSide ast.ExprNode + columnName, ok := cond.L.(*ast.ColumnNameExpr) + if !ok { + // maybe the right side is a column name + columnName, ok = cond.R.(*ast.ColumnNameExpr) + if !ok { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + "partial index condition must include a column name in the binary operation") + } + + anotherSide = cond.L + } else { + anotherSide = cond.R + } + columnInfo := model.FindColumnInfo(tblInfo.Columns, columnName.Name.Name.L) + if columnInfo == nil { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("column name `%s` referenced in partial index condition is not found in table", + columnName.Name.Name.L)) + } + if columnInfo.IsGenerated() { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("generated column %s cannot be used in partial index condition", columnName.Name.Name.L)) + } + + // The another side must be a literal value, and it must have the same type as the column. + constantExpr, ok := anotherSide.(ast.ValueExpr) + if !ok { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + "partial index condition must include a literal value on the other side of the binary operation") + } + // Reference `types.DefaultTypeForValue`, they are all possible types for literal values. + // However, this switch-case still includes more types than the ones we have in that function + // to avoid breaking in the future. + // + // Accept tiny type conversion as the type of the literal value is too limited. We shouldn't + // force the user to use such a limited range of types. + // + // It'll allow precision / length difference in most of the cases. + switch constantExpr.GetType().GetType() { + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, + mysql.TypeInt24, mysql.TypeBit, mysql.TypeYear: + // the target column must be an integer type or enum or set + if columnInfo.GetType() != mysql.TypeTiny && + columnInfo.GetType() != mysql.TypeShort && + columnInfo.GetType() != mysql.TypeLong && + columnInfo.GetType() != mysql.TypeLonglong && + columnInfo.GetType() != mysql.TypeInt24 && + columnInfo.GetType() != mysql.TypeBit && + columnInfo.GetType() != mysql.TypeYear && + columnInfo.GetType() != mysql.TypeEnum && + columnInfo.GetType() != mysql.TypeSet { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("the type %s of the column `%s` in partial index condition is not compatible with the literal value type %s", + columnInfo.FieldType.String(), columnName.Name.Name.L, constantExpr.GetType().String())) + } + return nil + case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeNewDecimal: + // the target column must be either a float or double type + // TODO: consider whether need to support decimal type in this branch + if columnInfo.GetType() != mysql.TypeFloat && + columnInfo.GetType() != mysql.TypeDouble && + columnInfo.GetType() != mysql.TypeNewDecimal { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("the type %s of the column `%s` in partial index condition is not compatible with the literal value type %s", + columnInfo.FieldType.String(), columnName.Name.Name.L, constantExpr.GetType().String())) + } + return nil + case mysql.TypeVarchar, mysql.TypeVarString, mysql.TypeString, + mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: + if types.IsString(columnInfo.GetType()) { + // check the collation of the column and the literal value + if columnInfo.FieldType.GetCharset() != constantExpr.GetType().GetCharset() { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("the charset %s of the column `%s` in partial index condition is not compatible with the literal value charset %s", + columnInfo.FieldType.GetCharset(), columnName.Name.Name.L, constantExpr.GetType().GetCharset())) + } + + return nil + } + + // Allow to compare a datetime type column with a string literal, because we don't have a datetime literal. + // This branch will allow users to use datetime columns in index condition. + if columnInfo.GetType() == mysql.TypeTimestamp || + columnInfo.GetType() == mysql.TypeDate || + columnInfo.GetType() == mysql.TypeDuration || + columnInfo.GetType() == mysql.TypeNewDate || + columnInfo.GetType() == mysql.TypeDatetime { + return nil + } + + // ENUM and SET are also allowed for string literal. + if columnInfo.GetType() == mysql.TypeEnum || columnInfo.GetType() == mysql.TypeSet { + return nil + } + + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("the type %s of the column `%s` in partial index condition is not compatible with the literal value type %s", + columnInfo.FieldType.String(), columnName.Name.Name.L, constantExpr.GetType().String())) + case mysql.TypeNull: + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + "= NULL is not supported in partial index condition because it is always false") + case mysql.TypeTimestamp, mysql.TypeDate, mysql.TypeDuration, mysql.TypeNewDate, + mysql.TypeDatetime, mysql.TypeJSON, mysql.TypeEnum, mysql.TypeSet: + // The `DATE '2025-07-28'` is actually a `cast` function, so they are also not supported yet. + intest.Assert(false, "should never generate literal values of these types") + + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("the type %s of the literal value in partial index condition is not supported", + constantExpr.GetType().String())) + default: + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + fmt.Sprintf("the type %s of the literal value in partial index condition is not supported", + constantExpr.GetType().String())) + } + default: + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs( + "the kind of partial index condition is not supported") + } +} + +func buildAffectColumn(idxInfo *model.IndexInfo, tblInfo *model.TableInfo) ([]*model.IndexColumn, error) { + ectx := exprstatic.NewExprContext() + + // Build affect column for partial index. + if idxInfo.HasCondition() { + cols, err := tables.ExtractColumnsFromCondition(ectx, idxInfo, tblInfo, true) + if err != nil { + return nil, err + } + return tables.DedupIndexColumns(cols), nil + } + + return nil, nil +} + +// buildIndexConditionChecker builds an expression for evaluating the index condition based on +// the given columns. +func buildIndexConditionChecker(copCtx copr.CopContext, tblInfo *model.TableInfo, idxInfo *model.IndexInfo) (func(row chunk.Row) (bool, error), error) { + schema, names := copCtx.GetBase().GetSchemaAndNames() + + exprCtx := copCtx.GetBase().ExprCtx + expr, err := expression.ParseSimpleExpr(exprCtx, idxInfo.ConditionExprString, expression.WithInputSchemaAndNames(schema, names, tblInfo)) + if err != nil { + return nil, err + } + + return func(row chunk.Row) (bool, error) { + datum, isNull, err := expr.EvalInt(exprCtx.GetEvalCtx(), row) + if err != nil { + return false, err + } + // If the result is NULL, it usually means the original column itself is NULL. + // In this case, we should refuse to consider the index for partial index condition. + return datum > 0 && !isNull, nil + }, nil +} +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) diff --git a/pkg/ddl/index_cop.go b/pkg/ddl/index_cop.go index 9960b8d008be6..baafbf4f5149d 100644 --- a/pkg/ddl/index_cop.go +++ b/pkg/ddl/index_cop.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/pkg/expression/exprctx" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/sessionctx/vardef" "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/table/tables" "github.com/pingcap/tidb/pkg/tablecodec" @@ -36,11 +37,15 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/collate" + "github.com/pingcap/tidb/pkg/util/logutil" "github.com/pingcap/tidb/pkg/util/timeutil" "github.com/pingcap/tipb/go-tipb" kvutil "github.com/tikv/client-go/v2/util" + "go.uber.org/zap" ) +const tableScanCopID = 1 + func wrapInBeginRollback(se *sess.Session, f func(startTS uint64) error) error { err := se.Begin(context.Background()) if err != nil { @@ -59,28 +64,60 @@ func wrapInBeginRollback(se *sess.Session, f func(startTS uint64) error) error { return err } -func buildTableScan(ctx context.Context, c *copr.CopContextBase, startTS uint64, start, end kv.Key) (distsql.SelectResult, error) { - dagPB, err := buildDAGPB(c.ExprCtx, c.DistSQLCtx, c.PushDownFlags, c.TableInfo, c.ColumnInfos) +func buildTableScan(ctx context.Context, c *copr.CopContextBase, distSQLCtx *distsqlctx.DistSQLContext, startTS uint64, start, end kv.Key, selectExpr expression.Expression) (distsql.SelectResult, bool, error) { + dagPB, conditionPushed, err := buildDAGPB(ctx, c.ExprCtx, distSQLCtx, c.PushDownFlags, c.TableInfo, c.ColumnInfos, selectExpr) if err != nil { - return nil, err + return nil, false, err } var builder distsql.RequestBuilder - kvReq, err := builder. + builder. SetDAGRequest(dagPB). SetStartTS(startTS). SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). SetKeepOrder(true). - SetFromSessionVars(c.DistSQLCtx). - SetConcurrency(1). + SetFromSessionVars(distSQLCtx). + SetConcurrency(1) + if selectExpr != nil { + // DDL will not push down to TiFlash currently, so we can just specify `kv.TiKV` here to make it clearer. + builder.SetStoreType(kv.TiKV) + } + kvReq, err := builder. Build() kvReq.RequestSource.RequestSourceInternal = true kvReq.RequestSource.RequestSourceType = getDDLRequestSource(model.ActionAddIndex) kvReq.RequestSource.ExplicitRequestSourceType = kvutil.ExplicitTypeDDL if err != nil { - return nil, err + return nil, conditionPushed, err + } + + if distSQLCtx.RuntimeStatsColl == nil { + result, err := distsql.Select(ctx, distSQLCtx, kvReq, c.FieldTypes) + return result, conditionPushed, err + } + // The plan ID of the table scan is always `tableScanCopID`, so we can read the stats of `tableScanCopID` executor to know + // how many rows have been scanned. + // + // The following logic assumes that the DAG has a structure like: + // TableScan -> Executor1 -> Executor2 -> ... -> ExecutorN + // So the plan IDs are assigned like: + // TableScan: tableScanCopID + // Executor1: tableScanCopID + 1 + // Executor2: tableScanCopID + 2 + // ... + // ExecutorN: tableScanCopID + N + copPlanIDs := make([]int, 0, 2) + copPlanIDs = append(copPlanIDs, tableScanCopID) + rootPlanID := tableScanCopID + for i := range dagPB.Executors { + if i == 0 { + continue + } + copPlanIDs = append(copPlanIDs, tableScanCopID+i) + rootPlanID = tableScanCopID + i } - return distsql.Select(ctx, c.DistSQLCtx, kvReq, c.FieldTypes) + result, err := distsql.SelectWithRuntimeStats(ctx, distSQLCtx, kvReq, c.FieldTypes, copPlanIDs, rootPlanID) + return result, conditionPushed, err } func fetchTableScanResult( @@ -136,20 +173,45 @@ func getRestoreData(tblInfo *model.TableInfo, targetIdx, pkIdx *model.IndexInfo, return dtToRestored } -func buildDAGPB(exprCtx exprctx.BuildContext, distSQLCtx *distsqlctx.DistSQLContext, pushDownFlags uint64, tblInfo *model.TableInfo, colInfos []*model.ColumnInfo) (*tipb.DAGRequest, error) { +func buildDAGPB(ctx context.Context, exprCtx exprctx.BuildContext, distSQLCtx *distsqlctx.DistSQLContext, pushDownFlags uint64, tblInfo *model.TableInfo, colInfos []*model.ColumnInfo, selectExpr expression.Expression) (*tipb.DAGRequest, bool, error) { + conditionPushed := false + dagReq := &tipb.DAGRequest{} dagReq.TimeZoneName, dagReq.TimeZoneOffset = timeutil.Zone(exprCtx.GetEvalCtx().Location()) dagReq.Flags = pushDownFlags for i := range colInfos { dagReq.OutputOffsets = append(dagReq.OutputOffsets, uint32(i)) } - execPB, err := constructTableScanPB(exprCtx, tblInfo, colInfos) + tblScanPB, err := constructTableScanPB(exprCtx, tblInfo, colInfos) if err != nil { - return nil, err + return nil, false, err } - dagReq.Executors = append(dagReq.Executors, execPB) + + var selectionPB *tipb.Executor + if selectExpr != nil { + selectionPB, err = constructSelectionPB(exprCtx, selectExpr, distSQLCtx, tblScanPB) + } + + // Now, the partial index doesn't support pushing down part of the condition. + // So if we cannot push down the whole condition, we just ignore it. + if err == nil && selectionPB != nil { + conditionPushed = true + dagReq.Executors = append(dagReq.Executors, tblScanPB, selectionPB) + } else { + if selectExpr != nil { + selectExprStr := selectExpr.StringWithCtx(exprCtx.GetEvalCtx(), errors.RedactLogDisable) + logutil.Logger(ctx).Info("fail to push down the selection expression for index condition", + zap.String("table", tblInfo.Name.O), + zap.String("expr", selectExprStr), + zap.Error(err)) + } + dagReq.Executors = append(dagReq.Executors, tblScanPB) + } + distsql.SetEncodeType(distSQLCtx, dagReq) - return dagReq, nil + collExec := true + dagReq.CollectExecutionSummaries = &collExec + return dagReq, conditionPushed, nil } func constructTableScanPB(ctx exprctx.BuildContext, tblInfo *model.TableInfo, colInfos []*model.ColumnInfo) (*tipb.Executor, error) { @@ -159,6 +221,32 @@ func constructTableScanPB(ctx exprctx.BuildContext, tblInfo *model.TableInfo, co return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}, err } +func constructSelectionPB(ctx exprctx.BuildContext, expr expression.Expression, distSQLCtx *distsqlctx.DistSQLContext, child *tipb.Executor) (*tipb.Executor, error) { + // Just use the default `vardef.DefGroupConcatMaxLen`, it only affects the AGG functions, so it doesn't matter here. + pc := expression.NewPushDownContext(ctx.GetEvalCtx(), distSQLCtx.Client, false, nil, nil, vardef.DefGroupConcatMaxLen) + // DDL will not push down to TiFlash currently, so we can just specify `kv.TiKV` here. + // If we want to support TiFlash in the future, we need to try to push down to both TiKV and TiFlash. + pushed, _ := expression.PushDownExprs(pc, []expression.Expression{expr}, kv.TiKV) + if len(pushed) == 0 { + // If no expression is pushed down, return nil to indicate that push down is not supported. + return nil, errors.New("cannot push down the selection expression") + } + + // As we have only one expression, the pushed expressions should be the same as the original expression. + pbExpr, err := expression.ExpressionsToPBList(ctx.GetEvalCtx(), pushed, distSQLCtx.Client) + if err != nil { + return nil, err + } + + return &tipb.Executor{ + Tp: tipb.ExecType_TypeSelection, + Selection: &tipb.Selection{ + Conditions: pbExpr, + Child: child, + }, + }, nil +} + // ExtractDatumByOffsets is exported for test. func ExtractDatumByOffsets(ctx expression.EvalContext, row chunk.Row, offsets []int, expCols []*expression.Column, buf []types.Datum) []types.Datum { for i, offset := range offsets { diff --git a/pkg/ddl/index_cop_test.go b/pkg/ddl/index_cop_test.go index 87239a6121404..566b685537846 100644 --- a/pkg/ddl/index_cop_test.go +++ b/pkg/ddl/index_cop_test.go @@ -44,7 +44,7 @@ func TestAddIndexFetchRowsFromCoprocessor(t *testing.T) { idxInfo := tblInfo.FindIndexByName(idx) sctx := tk.Session() - copCtx, err := ddl.NewReorgCopContext(store, ddl.NewDDLReorgMeta(sctx), tblInfo, []*model.IndexInfo{idxInfo}, "") + copCtx, err := ddl.NewReorgCopContext(ddl.NewDDLReorgMeta(sctx), tblInfo, []*model.IndexInfo{idxInfo}, "") require.NoError(t, err) require.IsType(t, copCtx, &copr.CopContextSingleIndex{}) startKey := tbl.RecordPrefix() diff --git a/pkg/ddl/index_merge_tmp.go b/pkg/ddl/index_merge_tmp.go index 9f667ebb66a48..2cad8d943806e 100644 --- a/pkg/ddl/index_merge_tmp.go +++ b/pkg/ddl/index_merge_tmp.go @@ -142,18 +142,26 @@ type mergeIndexWorker struct { currentIndex *model.IndexInfo } -func newMergeTempIndexWorker(bfCtx *backfillCtx, t table.PhysicalTable, elements []*meta.Element) *mergeIndexWorker { +func newMergeTempIndexWorker(bfCtx *backfillCtx, t table.PhysicalTable, elements []*meta.Element) (*mergeIndexWorker, error) { allIndexes := make([]table.Index, 0, len(elements)) for _, elem := range elements { indexInfo := model.FindIndexInfoByID(t.Meta().Indices, elem.ID) - index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) + index, err := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) + if err != nil { + return nil, err + } allIndexes = append(allIndexes, index) } return &mergeIndexWorker{ backfillCtx: bfCtx, indexes: allIndexes, +<<<<<<< HEAD } +======= + buffers: newTempIdxBuffers(bfCtx.batchCnt), + }, nil +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } func (w *mergeIndexWorker) setCurrentIndexForRange(taskRange *reorgBackfillTask) (err error) { diff --git a/pkg/ddl/index_presplit.go b/pkg/ddl/index_presplit.go new file mode 100644 index 0000000000000..57c8bfc51efed --- /dev/null +++ b/pkg/ddl/index_presplit.go @@ -0,0 +1,429 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "bytes" + "context" + "fmt" + "math" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/pkg/ddl/logutil" + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/expression/exprctx" + "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/table/tables" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util" + "github.com/pingcap/tidb/pkg/util/chunk" + contextutil "github.com/pingcap/tidb/pkg/util/context" + "github.com/pingcap/tidb/pkg/util/dbterror/exeerrors" + "github.com/pingcap/tidb/pkg/util/dbterror/plannererrors" + "go.uber.org/zap" +) + +func preSplitIndexRegions( + ctx context.Context, + sctx sessionctx.Context, + store kv.Storage, + tblInfo *model.TableInfo, + allIndexInfos []*model.IndexInfo, + reorgMeta *model.DDLReorgMeta, + args *model.ModifyIndexArgs, +) error { + warnHandler := contextutil.NewStaticWarnHandler(0) + exprCtx, err := newReorgExprCtxWithReorgMeta(reorgMeta, warnHandler) + if err != nil { + return errors.Trace(err) + } + splitOnTempIdx := reorgMeta.ReorgTp == model.ReorgTypeIngest || + reorgMeta.ReorgTp == model.ReorgTypeTxnMerge + for i, idxInfo := range allIndexInfos { + idxArg := args.IndexArgs[i] + splitArgs, err := evalSplitDatumFromArgs(exprCtx, tblInfo, idxInfo, idxArg) + if err != nil { + return errors.Trace(err) + } + if splitArgs == nil { + continue + } + splitKeys, err := getSplitIdxKeys(sctx, tblInfo, idxInfo, splitArgs) + if err != nil { + return errors.Trace(err) + } + if splitOnTempIdx { + for i := range splitKeys { + tablecodec.IndexKey2TempIndexKey(splitKeys[i]) + } + } + failpoint.InjectCall("beforePresplitIndex", splitKeys) + err = splitIndexRegionAndWait(ctx, sctx, store, tblInfo, idxInfo, splitKeys) + if err != nil { + return errors.Trace(err) + } + } + return nil +} + +type splitArgs struct { + byRows [][]types.Datum + + betweenLower []types.Datum + betweenUpper []types.Datum + regionsCnt int +} + +func getSplitIdxKeys( + sctx sessionctx.Context, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + args *splitArgs, +) ([][]byte, error) { + // Split index regions by user specified value lists. + if len(args.byRows) > 0 { + return getSplitIdxKeysFromValueList(sctx, tblInfo, idxInfo, args.byRows) + } + + return getSplitIdxKeysFromBound( + sctx, tblInfo, idxInfo, args.betweenLower, args.betweenUpper, args.regionsCnt) +} + +func getSplitIdxKeysFromValueList( + sctx sessionctx.Context, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + byRows [][]types.Datum, +) (destKeys [][]byte, err error) { + pi := tblInfo.GetPartitionInfo() + if pi == nil { + destKeys = make([][]byte, 0, len(byRows)+1) + return getSplitIdxPhysicalKeysFromValueList(sctx, tblInfo, idxInfo, tblInfo.ID, byRows, destKeys) + } + + if idxInfo.Global { + destKeys = make([][]byte, 0, len(byRows)+1) + return getSplitIdxPhysicalKeysFromValueList(sctx, tblInfo, idxInfo, tblInfo.ID, byRows, destKeys) + } + + destKeys = make([][]byte, 0, (len(byRows)+1)*len(pi.Definitions)) + for _, p := range pi.Definitions { + destKeys, err = getSplitIdxPhysicalKeysFromValueList(sctx, tblInfo, idxInfo, p.ID, byRows, destKeys) + if err != nil { + return nil, err + } + } + return destKeys, nil +} + +func getSplitIdxPhysicalKeysFromValueList( + sctx sessionctx.Context, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + physicalID int64, + splitDatum [][]types.Datum, + destKeys [][]byte, +) ([][]byte, error) { + destKeys = getSplitIdxPhysicalStartAndOtherIdxKeys(tblInfo, idxInfo, physicalID, destKeys) + index, err := tables.NewIndex(physicalID, tblInfo, idxInfo) + if err != nil { + return nil, err + } + sc := sctx.GetSessionVars().StmtCtx + for _, v := range splitDatum { + idxKey, _, err := index.GenIndexKey(sc.ErrCtx(), sc.TimeZone(), v, kv.IntHandle(math.MinInt64), nil) + if err != nil { + return nil, err + } + destKeys = append(destKeys, idxKey) + } + return destKeys, nil +} + +func getSplitIdxPhysicalStartAndOtherIdxKeys( + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + physicalID int64, + keys [][]byte, +) [][]byte { + // 1. Split in the start key for the index if the index is not the first index. + // For the first index, splitting the start key can produce the region [tid, tid_i_1), which is useless. + if len(tblInfo.Indices) > 0 && tblInfo.Indices[0].ID != idxInfo.ID { + startKey := tablecodec.EncodeTableIndexPrefix(physicalID, idxInfo.ID) + keys = append(keys, startKey) + } + + // 2. Split in the end key. + endKey := tablecodec.EncodeTableIndexPrefix(physicalID, idxInfo.ID+1) + keys = append(keys, endKey) + return keys +} + +func getSplitIdxKeysFromBound( + sctx sessionctx.Context, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + lower, upper []types.Datum, + splitNum int, +) (keys [][]byte, err error) { + pi := tblInfo.GetPartitionInfo() + if pi == nil { + keys = make([][]byte, 0, splitNum) + return getSplitIdxPhysicalKeysFromBound( + sctx, tblInfo, idxInfo, tblInfo.ID, lower, upper, splitNum, keys) + } + keys = make([][]byte, 0, splitNum*len(pi.Definitions)) + for _, p := range pi.Definitions { + keys, err = getSplitIdxPhysicalKeysFromBound( + sctx, tblInfo, idxInfo, p.ID, lower, upper, splitNum, keys) + if err != nil { + return nil, err + } + } + return keys, nil +} + +func getSplitIdxPhysicalKeysFromBound( + sctx sessionctx.Context, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + physicalID int64, + lower, upper []types.Datum, + splitNum int, + destKeys [][]byte, +) ([][]byte, error) { + destKeys = getSplitIdxPhysicalStartAndOtherIdxKeys(tblInfo, idxInfo, physicalID, destKeys) + index, err := tables.NewIndex(physicalID, tblInfo, idxInfo) + if err != nil { + return nil, err + } + // Split index regions by lower, upper value and calculate the step by (upper - lower)/num. + sc := sctx.GetSessionVars().StmtCtx + lowerIdxKey, _, err := index.GenIndexKey(sc.ErrCtx(), sc.TimeZone(), lower, kv.IntHandle(math.MinInt64), nil) + if err != nil { + return nil, err + } + // Use math.MinInt64 as handle_id for the upper index key to avoid affecting calculate split point. + // If use math.MaxInt64 here, test of `TestSplitIndex` will report error. + upperIdxKey, _, err := index.GenIndexKey(sc.ErrCtx(), sc.TimeZone(), upper, kv.IntHandle(math.MinInt64), nil) + if err != nil { + return nil, err + } + + if bytes.Compare(lowerIdxKey, upperIdxKey) >= 0 { + lowerStr := datumSliceToString(lower) + upperStr := datumSliceToString(upper) + errMsg := fmt.Sprintf("Split index `%v` region lower value %v should less than the upper value %v", + idxInfo.Name, lowerStr, upperStr) + return nil, exeerrors.ErrInvalidSplitRegionRanges.GenWithStackByArgs(errMsg) + } + return util.GetValuesList(lowerIdxKey, upperIdxKey, splitNum, destKeys), nil +} + +func datumSliceToString(ds []types.Datum) string { + str := "(" + for i, d := range ds { + s, err := d.ToString() + if err != nil { + return fmt.Sprintf("%v", ds) + } + if i > 0 { + str += "," + } + str += s + } + str += ")" + return str +} + +func splitIndexRegionAndWait( + ctx context.Context, + sctx sessionctx.Context, + store kv.Storage, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + splitIdxKeys [][]byte, +) error { + s, ok := store.(kv.SplittableStore) + if !ok { + return nil + } + start := time.Now() + ctxWithTimeout, cancel := context.WithTimeout(ctx, sctx.GetSessionVars().GetSplitRegionTimeout()) + defer cancel() + regionIDs, err := s.SplitRegions(ctxWithTimeout, splitIdxKeys, true, &tblInfo.ID) + if err != nil { + logutil.DDLLogger().Error("split table index region failed", + zap.String("table", tblInfo.Name.L), + zap.String("index", tblInfo.Name.L), + zap.Error(err)) + return err + } + failpoint.Inject("mockSplitIndexRegionAndWaitErr", func(_ failpoint.Value) { + failpoint.Return(context.DeadlineExceeded) + }) + finishScatterRegions := waitScatterRegionFinish(ctxWithTimeout, sctx, start, s, regionIDs, tblInfo.Name.L, idxInfo.Name.L) + logutil.DDLLogger().Info("split table index region finished", + zap.String("table", tblInfo.Name.L), + zap.String("index", idxInfo.Name.L), + zap.Int("splitRegions", len(regionIDs)), + zap.Int("scatterRegions", finishScatterRegions), + ) + return nil +} + +func evalSplitDatumFromArgs( + buildCtx exprctx.BuildContext, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, + idxArg *model.IndexArg, +) (*splitArgs, error) { + opt := idxArg.SplitOpt + if opt == nil { + return nil, nil + } + if len(opt.ValueLists) > 0 { + indexValues := make([][]types.Datum, 0, len(opt.ValueLists)) + for i, valueList := range opt.ValueLists { + if len(valueList) > len(idxInfo.Columns) { + return nil, plannererrors.ErrWrongValueCountOnRow.GenWithStackByArgs(i + 1) + } + values, err := evalConstExprNodes(buildCtx, valueList, tblInfo, idxInfo) + if err != nil { + return nil, err + } + indexValues = append(indexValues, values) + } + return &splitArgs{byRows: indexValues}, nil + } + + if len(opt.Lower) == 0 && len(opt.Upper) == 0 && opt.Num > 0 { + lowerVals := make([]types.Datum, 0, len(idxInfo.Columns)) + upperVals := make([]types.Datum, 0, len(idxInfo.Columns)) + for range idxInfo.Columns { + lowerVals = append(lowerVals, types.MinNotNullDatum()) + upperVals = append(upperVals, types.MaxValueDatum()) + } + return &splitArgs{ + betweenLower: lowerVals, + betweenUpper: upperVals, + regionsCnt: int(opt.Num), + }, nil + } + + // Split index regions by lower, upper value. + checkLowerUpperValue := func(valuesItem []string, name string) ([]types.Datum, error) { + if len(valuesItem) == 0 { + return nil, errors.Errorf("Split index `%v` region %s value count should be greater than 0", idxInfo.Name, name) + } + if len(valuesItem) > len(idxInfo.Columns) { + return nil, errors.Errorf("Split index `%v` region column count doesn't match value count at %v", idxInfo.Name, name) + } + return evalConstExprNodes(buildCtx, valuesItem, tblInfo, idxInfo) + } + lowerValues, err := checkLowerUpperValue(opt.Lower, "lower") + if err != nil { + return nil, err + } + upperValues, err := checkLowerUpperValue(opt.Upper, "upper") + if err != nil { + return nil, err + } + splitArgs := &splitArgs{ + betweenLower: lowerValues, + betweenUpper: upperValues, + } + splitArgs.regionsCnt = int(opt.Num) + return splitArgs, nil +} + +func evalConstExprNodes( + buildCtx exprctx.BuildContext, + valueList []string, + tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, +) ([]types.Datum, error) { + values := make([]types.Datum, 0, len(valueList)) + for j, value := range valueList { + colOffset := idxInfo.Columns[j].Offset + col := tblInfo.Columns[colOffset] + exp, err := expression.ParseSimpleExpr(buildCtx, value) + if err != nil { + return nil, err + } + evalCtx := buildCtx.GetEvalCtx() + evaluatedVal, err := exp.Eval(evalCtx, chunk.Row{}) + if err != nil { + return nil, err + } + + d, err := evaluatedVal.ConvertTo(evalCtx.TypeCtx(), &col.FieldType) + if err != nil { + if !types.ErrTruncated.Equal(err) && + !types.ErrTruncatedWrongVal.Equal(err) && + !types.ErrBadNumber.Equal(err) { + return nil, err + } + valStr, err1 := evaluatedVal.ToString() + if err1 != nil { + return nil, err + } + return nil, types.ErrTruncated.GenWithStack("Incorrect value: '%-.128s' for column '%.192s'", valStr, col.Name.O) + } + values = append(values, d) + } + return values, nil +} + +func waitScatterRegionFinish( + ctxWithTimeout context.Context, + sctx sessionctx.Context, + startTime time.Time, + store kv.SplittableStore, + regionIDs []uint64, + tableName, indexName string, +) int { + remainMillisecond := 0 + finishScatterNum := 0 + for _, regionID := range regionIDs { + select { + case <-ctxWithTimeout.Done(): + // Do not break here for checking remain regions scatter finished with a very short backoff time. + // Consider this situation - Regions 1, 2, and 3 are to be split. + // Region 1 times out before scattering finishes, while Region 2 and Region 3 have finished scattering. + // In this case, we should return 2 Regions, instead of 0, have finished scattering. + remainMillisecond = 50 + default: + remainMillisecond = int((sctx.GetSessionVars().GetSplitRegionTimeout().Seconds() - time.Since(startTime).Seconds()) * 1000) + } + + err := store.WaitScatterRegionFinish(ctxWithTimeout, regionID, remainMillisecond) + if err == nil { + finishScatterNum++ + } else { + logutil.DDLLogger().Warn("wait scatter region failed", + zap.Uint64("regionID", regionID), + zap.String("table", tableName), + zap.String("index", indexName), + zap.Error(err)) + } + } + return finishScatterNum +} diff --git a/pkg/ddl/integration_test.go b/pkg/ddl/integration_test.go index fe7337c7a1104..662d59c1a3e3c 100644 --- a/pkg/ddl/integration_test.go +++ b/pkg/ddl/integration_test.go @@ -53,3 +53,92 @@ func TestDDLStatementsBackFill(t *testing.T) { require.Equal(t, tc.expectedNeedReorg, needReorg, tc) } } +<<<<<<< HEAD +======= + +func TestPartialIndex(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test;") + + // test validate column exists in create table + tk.MustExec("create table t (a int, b int, key(b) where a = 1);") + tk.MustGetDBError("create table t1 (a int, b int, key(b) where c = 1);", + dbterror.ErrUnsupportedAddPartialIndex) + tk.MustExec("drop table t;") + + // test primary key is not allowed in partial index + tk.MustExec("create table t (a int, b int, key(b) where a = 1);") + tk.MustGetDBError("create table t2 (a int, b int, primary key(b) where a = 1);", + dbterror.ErrUnsupportedAddPartialIndex) + tk.MustExec("drop table t;") + + checkColumnTypes := func(columnTypes []string, literals []string, shouldAllowed bool) { + for _, columnType := range columnTypes { + for _, literal := range literals { + tk.MustExec("drop table if exists t;") + sql := fmt.Sprintf("create table t (a %s, b int, key(b) where a = %s);", columnType, literal) + if shouldAllowed { + tk.MustExec(sql) + tk.MustExec("drop table t;") + } else { + tk.MustGetDBError(sql, dbterror.ErrUnsupportedAddPartialIndex) + } + } + } + } + + // test create table type validation + differentTypeLiterals := [][]string{ + {"1", "true", "1998"}, // int + {"'1'"}, // string with default collate + {"1.0"}, // float + {"b'101010'", "0x1234567890abcdef", "0b10"}, // binary literal + {"null"}, // null + } + differentColumnTypes := [][]string{ + {"int", "bigint", "tinyint", "smallint", "year"}, + {"char(25)", "varchar(123)", "text", "char(25) collate utf8mb4_general_ci", "char(25) collate utf8mb4_bin"}, + {"float", "double"}, + {"binary(25) collate binary", "varbinary(123)", "blob", "char(25) collate binary"}, + {}, + } + for i, columnTypes := range differentColumnTypes { + for j, literals := range differentTypeLiterals { + checkColumnTypes(columnTypes, literals, i == j) + } + } + + // test comparing between time column and string constant is allowed. + timeColumnTypes := []string{"timestamp", "datetime", "date", "time"} + allowedLiterals := []string{"'2025-07-28 12:34:56'", "'2025-07-28'", "'12:34:56'"} + notAllowedLiterals := []string{"1", "1.0", "true", "null"} + checkColumnTypes(timeColumnTypes, allowedLiterals, true) + checkColumnTypes(timeColumnTypes, notAllowedLiterals, false) + + // test comparing between enum/set column and int/string constant is allowed. + enumSetColumnTypes := []string{"enum('a', 'b', 'c')", "set('a', 'b', 'c')"} + allowedLiterals = []string{"1", "'1'", "'a'"} + notAllowedLiterals = []string{"1.0", "null"} + checkColumnTypes(enumSetColumnTypes, allowedLiterals, true) + checkColumnTypes(enumSetColumnTypes, notAllowedLiterals, false) + + // test alter table type validation + for i, literals := range differentTypeLiterals { + for _, literal := range literals { + for j, columnTypes := range differentColumnTypes { + tk.MustExec("drop table if exists t;") + for _, columnType := range columnTypes { + sql := fmt.Sprintf("create table t (a %s, b int, key idx_b(b) where a = %s);", columnType, literal) + if i == j { + tk.MustExec(sql) + tk.MustExec("drop table t;") + } else { + tk.MustGetDBError(sql, dbterror.ErrUnsupportedAddPartialIndex) + } + } + } + } + } +} +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) diff --git a/pkg/ddl/modify_column.go b/pkg/ddl/modify_column.go index c14010407d313..7b72310499d62 100644 --- a/pkg/ddl/modify_column.go +++ b/pkg/ddl/modify_column.go @@ -308,6 +308,11 @@ func (w *worker) onModifyColumn(jobCtx *jobContext, job *model.Job) (ver int64, return ver, errors.Trace(err) } + if err = checkColumnReferencedByPartialCondition(tblInfo, args.Column.Name); err != nil { + job.State = model.JobStateCancelled + return ver, errors.Trace(err) + } + if args.ChangingColumn == nil { if err := checkColumnAlreadyExists(tblInfo, args); err != nil { job.State = model.JobStateCancelled @@ -1464,6 +1469,10 @@ func GetModifiableColumnJob( if col == nil { return nil, infoschema.ErrColumnNotExists.GenWithStackByArgs(originalColName, ident.Name) } + err = checkColumnReferencedByPartialCondition(t.Meta(), col.ColumnInfo.Name) + if err != nil { + return nil, errors.Trace(err) + } newColName := specNewColumn.Name.Name if newColName.L == model.ExtraHandleName.L { return nil, dbterror.ErrWrongColumnName.GenWithStackByArgs(newColName.L) diff --git a/pkg/ddl/partition.go b/pkg/ddl/partition.go index acd84de33d942..8f502fd5ab7e5 100644 --- a/pkg/ddl/partition.go +++ b/pkg/ddl/partition.go @@ -653,6 +653,10 @@ func buildTablePartitionInfo(ctx *metabuild.Context, s *ast.PartitionOptions, tb } } } + + if index.HasCondition() { + return dbterror.ErrUnsupportedAddPartialIndex.GenWithStackByArgs("partial index is not supported on partitioned table") + } } if tbInfo.PKIsHandle { // This case is covers when the Handle is the PK (only ints), since it would not diff --git a/pkg/errno/errcode.go b/pkg/errno/errcode.go index 5efede23a3072..02caf736fdf75 100644 --- a/pkg/errno/errcode.go +++ b/pkg/errno/errcode.go @@ -1164,6 +1164,15 @@ const ( ErrResourceGroupInvalidBackgroundTaskName = 8255 ErrResourceGroupInvalidForRole = 8257 +<<<<<<< HEAD +======= + // Reserved for future use. + ErrEngineAttributeInvalidFormat = 8270 + ErrStorageClassInvalidSpec = 8271 + ErrModifyColumnReferencedByPartialCondition = 8272 + ErrCheckPartialIndexWithoutFastCheck = 8273 + +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) // TiKV/PD/TiFlash errors. ErrPDServerTimeout = 9001 ErrTiKVServerTimeout = 9002 diff --git a/pkg/errno/errname.go b/pkg/errno/errname.go index bd1299cd9e571..1c1f430b6b669 100644 --- a/pkg/errno/errname.go +++ b/pkg/errno/errname.go @@ -1156,6 +1156,15 @@ var MySQLErrName = map[uint16]*mysql.ErrMessage{ ErrResourceGroupQueryRunawayInterrupted: mysql.Message("Query execution was interrupted, identified as runaway query [%s]", nil), ErrResourceGroupQueryRunawayQuarantine: mysql.Message("Quarantined and interrupted because of being in runaway watch list", nil), ErrResourceGroupInvalidBackgroundTaskName: mysql.Message("Unknown background task name '%-.192s'", nil), +<<<<<<< HEAD +======= + ErrQueryExecStopped: mysql.Message("Query execution was stopped by the global memory arbitrator [reason=%s] [conn=%d]", nil), + + ErrEngineAttributeInvalidFormat: mysql.Message("Invalid engine attribute format: %s", nil), + ErrStorageClassInvalidSpec: mysql.Message("Invalid storage class: %s", nil), + ErrModifyColumnReferencedByPartialCondition: mysql.Message("Cannot drop, change or modify column '%s': it is referenced in partial index '%s'", nil), + ErrCheckPartialIndexWithoutFastCheck: mysql.Message("Validation of partial indexes requires tidb_enable_fast_table_check=ON", nil), +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) // TiKV/PD errors. ErrPDServerTimeout: mysql.Message("PD server timeout: %s", nil), diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go index 78d4543c35cf2..a87b69afb4705 100644 --- a/pkg/executor/builder.go +++ b/pkg/executor/builder.go @@ -507,14 +507,22 @@ func buildIndexLookUpChecker(b *executorBuilder, p *plannercore.PhysicalIndexLoo } func (b *executorBuilder) buildCheckTable(v *plannercore.CheckTable) exec.Executor { +<<<<<<< HEAD noMVIndexOrPrefixIndexOrVectorIndex := true for _, idx := range v.IndexInfos { if idx.MVIndex || idx.VectorInfo != nil { noMVIndexOrPrefixIndexOrVectorIndex = false +======= + canUseFastCheck := true + for _, idx := range v.IndexInfos { + if idx.MVIndex || idx.IsColumnarIndex() { + canUseFastCheck = false +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) break } for _, col := range idx.Columns { if col.Length != types.UnspecifiedLength { +<<<<<<< HEAD noMVIndexOrPrefixIndexOrVectorIndex = false break } @@ -524,6 +532,17 @@ func (b *executorBuilder) buildCheckTable(v *plannercore.CheckTable) exec.Execut } } if b.sctx.GetSessionVars().FastCheckTable && noMVIndexOrPrefixIndexOrVectorIndex { +======= + canUseFastCheck = false + break + } + } + if !canUseFastCheck { + break + } + } + if b.ctx.GetSessionVars().FastCheckTable && canUseFastCheck { +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) e := &FastCheckTableExec{ BaseExecutor: exec.NewBaseExecutor(b.sctx, v.Schema(), v.ID()), dbName: v.DBName, diff --git a/pkg/executor/check_table_index.go b/pkg/executor/check_table_index.go index 63b6d4fc154a6..92d00e037f305 100644 --- a/pkg/executor/check_table_index.go +++ b/pkg/executor/check_table_index.go @@ -22,8 +22,12 @@ import ( "strings" "github.com/pingcap/errors" +<<<<<<< HEAD "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/disttask/operator" +======= + "github.com/pingcap/tidb/pkg/errno" +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) "github.com/pingcap/tidb/pkg/executor/internal/exec" "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" @@ -39,6 +43,11 @@ import ( "github.com/pingcap/tidb/pkg/util/admin" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/codec" +<<<<<<< HEAD +======= + "github.com/pingcap/tidb/pkg/util/dbterror" + "github.com/pingcap/tidb/pkg/util/intest" +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) "github.com/pingcap/tidb/pkg/util/logutil" "github.com/pingcap/tidb/pkg/util/logutil/consistency" "github.com/pingcap/tidb/pkg/util/sqlexec" @@ -46,6 +55,8 @@ import ( "go.uber.org/zap" ) +var errCheckPartialIndexWithoutFastCheck = dbterror.ClassExecutor.NewStd(errno.ErrCheckPartialIndexWithoutFastCheck) + // CheckTableExec represents a check table executor. // It is built from the "admin check table" statement, and it checks if the // index matches the records in the table. @@ -144,6 +155,9 @@ func (e *CheckTableExec) Next(ctx context.Context, _ *chunk.Chunk) error { if idx.MVIndex || idx.VectorInfo != nil { continue } + if idx.HasCondition() { + return errors.Trace(errCheckPartialIndexWithoutFastCheck) + } idxNames = append(idxNames, idx.Name.O) } greater, idxOffset, err := admin.CheckIndicesCount(e.Ctx(), e.dbName, e.table.Meta().Name.O, idxNames) @@ -226,7 +240,10 @@ func (e *CheckTableExec) checkTableRecord(ctx context.Context, idxOffset int) er return err } if e.table.Meta().GetPartitionInfo() == nil { - idx := tables.NewIndex(e.table.Meta().ID, e.table.Meta(), idxInfo) + idx, err := tables.NewIndex(e.table.Meta().ID, e.table.Meta(), idxInfo) + if err != nil { + return err + } return admin.CheckRecordAndIndex(ctx, e.Ctx(), txn, e.table, idx) } @@ -234,7 +251,10 @@ func (e *CheckTableExec) checkTableRecord(ctx context.Context, idxOffset int) er for _, def := range info.Definitions { pid := def.ID partition := e.table.(table.PartitionedTable).GetPartition(pid) - idx := tables.NewIndex(def.ID, e.table.Meta(), idxInfo) + idx, err := tables.NewIndex(def.ID, e.table.Meta(), idxInfo) + if err != nil { + return err + } if err := admin.CheckRecordAndIndex(ctx, e.Ctx(), txn, partition, idx); err != nil { return errors.Trace(err) } @@ -336,6 +356,42 @@ func (w *checkIndexWorker) initSessCtx(se sessionctx.Context) (restore func()) { } } +<<<<<<< HEAD +======= +func queryToRow(ctx context.Context, se sessionctx.Context, sql string) ([]chunk.Row, error) { + rs, err := se.GetSQLExecutor().ExecuteInternal(ctx, sql) + if err != nil { + return nil, err + } + return sqlexec.DrainRecordSetAndClose(ctx, rs, 4096) +} + +func verifyIndexSideQuery(ctx context.Context, se sessionctx.Context, sql string) bool { + rows, err := queryToRow(ctx, se, "explain "+sql) + if err != nil { + return false + } + + isTableScan := false + isIndexScan := false + for _, row := range rows { + op := row.GetString(0) + if strings.Contains(op, "TableFullScan") { + isTableScan = true + } else if strings.Contains(op, "IndexFullScan") || strings.Contains(op, "IndexRangeScan") { + // It's also possible to be index range scan if the index has condition. + // TODO: if the planner eliminates the range in the index scan, we can remove the `IndexRangeScan` check. + isIndexScan = true + } + } + + if isTableScan || !isIndexScan { + return false + } + return true +} + +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) // HandleTask implements the Worker interface. func (w *checkIndexWorker) HandleTask(task checkIndexTask, _ func(workerpool.None)) error { failpoint.Inject("mockFastCheckTableError", func() { @@ -447,9 +503,22 @@ func (w *checkIndexWorker) HandleTask(task checkIndexTask, _ func(workerpool.Non whereKey = "0" } checkOnce = true + idxCondition := "" + if idxInfo.HasCondition() { + idxCondition = fmt.Sprintf("(%s) AND ", idxInfo.ConditionExprString) + } +<<<<<<< HEAD tblQuery := fmt.Sprintf("select /*+ read_from_storage(tikv[%s]) */ bit_xor(%s), %s, count(*) from %s use index() where %s = 0 group by %s", TableName(w.e.dbName, w.e.table.Meta().Name.String()), md5HandleAndIndexCol.String(), groupByKey, TableName(w.e.dbName, w.e.table.Meta().Name.String()), whereKey, groupByKey) idxQuery := fmt.Sprintf("select bit_xor(%s), %s, count(*) from %s use index(`%s`) where %s = 0 group by %s", md5HandleAndIndexCol.String(), groupByKey, TableName(w.e.dbName, w.e.table.Meta().Name.String()), idxInfo.Name, whereKey, groupByKey) +======= + tblQuery := fmt.Sprintf( + "select /*+ read_from_storage(tikv[%s]), AGG_TO_COP() */ bit_xor(%s), %s, count(*) from %s use index() where %s(%s = 0) group by %s", + tblName, md5HandleAndIndexCol, groupByKey, tblName, idxCondition, whereKey, groupByKey) + idxQuery := fmt.Sprintf( + "select /*+ AGG_TO_COP() */ bit_xor(%s), %s, count(*) from %s use index(`%s`) where %s (%s = 0) group by%s", + md5HandleAndIndexCol, groupByKey, tblName, idxInfo.Name, idxCondition, whereKey, groupByKey) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) logutil.BgLogger().Info("fast check table by group", zap.String("table name", w.table.Meta().Name.String()), zap.String("index name", idxInfo.Name.String()), zap.Int("times", times), zap.Int("current offset", offset), zap.Int("current mod", mod), zap.String("table sql", tblQuery), zap.String("index sql", idxQuery)) @@ -514,8 +583,27 @@ func (w *checkIndexWorker) HandleTask(task checkIndexTask, _ func(workerpool.Non mod *= bucketSize } +<<<<<<< HEAD queryToRow := func(se sessionctx.Context, sql string) ([]chunk.Row, error) { rs, err := se.GetSQLExecutor().ExecuteInternal(ctx, sql) +======= + if meetError { + idxCondition := "" + if idxInfo.HasCondition() { + idxCondition = fmt.Sprintf("(%s) AND ", idxInfo.ConditionExprString) + } + groupByKey := fmt.Sprintf("((cast(%s as signed) - %d) %% %d)", md5Handle, offset, mod) + indexSQL := fmt.Sprintf( + "select /*+ AGG_TO_COP() */ %s, %s, %s from %s use index(`%s`) where %s(%s = 0) order by %s", + handleColumns, indexColumns, md5HandleAndIndexCol, tblName, idxInfo.Name, idxCondition, groupByKey, handleColumns) + tableSQL := fmt.Sprintf( + "select /*+ read_from_storage(tikv[%s]), AGG_TO_COP() */ %s, %s, %s from %s use index() where %s(%s = 0) order by %s", + tblName, handleColumns, indexColumns, md5HandleAndIndexCol, tblName, idxCondition, groupByKey, handleColumns) + intest.AssertFunc(func() bool { + return verifyIndexSideQuery(ctx, se, indexSQL) + }, "index side query plan is not correct: %s", indexSQL) + idxRow, err := queryToRow(ctx, se, indexSQL) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if err != nil { return nil, err } diff --git a/pkg/executor/distsql_test.go b/pkg/executor/distsql_test.go index e5f1a461241ab..9e9b4d4aefcd5 100644 --- a/pkg/executor/distsql_test.go +++ b/pkg/executor/distsql_test.go @@ -168,7 +168,8 @@ func TestInconsistentIndex(t *testing.T) { tbl, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t")) require.NoError(t, err) idx := tbl.Meta().FindIndexByName("idx_a") - idxOp := tables.NewIndex(tbl.Meta().ID, tbl.Meta(), idx) + idxOp, err := tables.NewIndex(tbl.Meta().ID, tbl.Meta(), idx) + require.NoError(t, err) ctx := mock.NewContext() ctx.Store = store diff --git a/pkg/executor/split.go b/pkg/executor/split.go index b62ece62af582..24353d6d8701d 100644 --- a/pkg/executor/split.go +++ b/pkg/executor/split.go @@ -164,7 +164,10 @@ func (e *SplitIndexRegionExec) getSplitIdxKeysFromValueList() (keys [][]byte, er func (e *SplitIndexRegionExec) getSplitIdxPhysicalKeysFromValueList(physicalID int64, keys [][]byte) ([][]byte, error) { keys = e.getSplitIdxPhysicalStartAndOtherIdxKeys(physicalID, keys) - index := tables.NewIndex(physicalID, e.tableInfo, e.indexInfo) + index, err := tables.NewIndex(physicalID, e.tableInfo, e.indexInfo) + if err != nil { + return nil, err + } sc := e.Ctx().GetSessionVars().StmtCtx for _, v := range e.valueLists { idxKey, _, err := index.GenIndexKey(sc.ErrCtx(), sc.TimeZone(), v, kv.IntHandle(math.MinInt64), nil) @@ -226,7 +229,10 @@ func (e *SplitIndexRegionExec) getSplitIdxKeysFromBound() (keys [][]byte, err er func (e *SplitIndexRegionExec) getSplitIdxPhysicalKeysFromBound(physicalID int64, keys [][]byte) ([][]byte, error) { keys = e.getSplitIdxPhysicalStartAndOtherIdxKeys(physicalID, keys) - index := tables.NewIndex(physicalID, e.tableInfo, e.indexInfo) + index, err := tables.NewIndex(physicalID, e.tableInfo, e.indexInfo) + if err != nil { + return nil, err + } // Split index regions by lower, upper value and calculate the step by (upper - lower)/num. sc := e.Ctx().GetSessionVars().StmtCtx lowerIdxKey, _, err := index.GenIndexKey(sc.ErrCtx(), sc.TimeZone(), e.lower, kv.IntHandle(math.MinInt64), nil) diff --git a/pkg/executor/split_test.go b/pkg/executor/split_test.go index dfad5584e1d19..aa401551052cd 100644 --- a/pkg/executor/split_test.go +++ b/pkg/executor/split_test.go @@ -161,7 +161,8 @@ func TestSplitIndex(t *testing.T) { {1000, 9}, } - index := tables.NewIndex(tbInfo.ID, tbInfo, idxInfo) + index, err := tables.NewIndex(tbInfo.ID, tbInfo, idxInfo) + require.NoError(t, err) for _, ca := range cases { // test for minInt64 handle sc := ctx.GetSessionVars().StmtCtx diff --git a/pkg/executor/test/admintest/admin_test.go b/pkg/executor/test/admintest/admin_test.go index f4e4627bc5a94..0e983affb1fd1 100644 --- a/pkg/executor/test/admintest/admin_test.go +++ b/pkg/executor/test/admintest/admin_test.go @@ -94,7 +94,9 @@ func TestAdminRecoverIndex(t *testing.T) { tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("c2") - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) err = indexOpr.Delete(ctx, txn, types.MakeDatums(1), kv.IntHandle(1)) @@ -184,7 +186,8 @@ func TestAdminRecoverIndex(t *testing.T) { tblInfo = tbl.Meta() idxInfo = tblInfo.FindIndexByName("i1") - indexOpr = tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err = tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err = store.Begin() require.NoError(t, err) err = indexOpr.Delete(ctx, txn, types.MakeDatums(2), kv.IntHandle(1)) @@ -234,7 +237,9 @@ func TestAdminRecoverMVIndex(t *testing.T) { cpIdx := idxInfo.Clone() cpIdx.MVIndex = false - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, cpIdx) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, cpIdx) + require.NoError(t, err) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) @@ -279,7 +284,8 @@ func TestAdminCleanupMVIndex(t *testing.T) { cpIdx := idxInfo.Clone() cpIdx.MVIndex = false - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, cpIdx) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, cpIdx) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) @@ -321,7 +327,8 @@ func TestClusteredIndexAdminRecoverIndex(t *testing.T) { require.NoError(t, err) tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("idx") - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) // Some index entries are missed. // Recover an index don't covered by clustered index. @@ -342,7 +349,8 @@ func TestClusteredIndexAdminRecoverIndex(t *testing.T) { // Recover an index covered by clustered index. idx1Info := tblInfo.FindIndexByName("idx1") - indexOpr1 := tables.NewIndex(tblInfo.ID, tblInfo, idx1Info) + indexOpr1, err := tables.NewIndex(tblInfo.ID, tblInfo, idx1Info) + require.NoError(t, err) txn, err = store.Begin() require.NoError(t, err) err = indexOpr1.Delete(ctx, txn, types.MakeDatums("3"), cHandle) @@ -376,7 +384,8 @@ func TestAdminRecoverPartitionTableIndex(t *testing.T) { checkFunc := func(tbl table.Table, pid int64, idxValue int) { idxInfo := tbl.Meta().FindIndexByName("c2") - indexOpr := tables.NewIndex(pid, tbl.Meta(), idxInfo) + indexOpr, err := tables.NewIndex(pid, tbl.Meta(), idxInfo) + require.NoError(t, err) ctx := mock.NewContext() txn, err := store.Begin() require.NoError(t, err) @@ -454,7 +463,8 @@ func TestAdminRecoverIndex1(t *testing.T) { tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("primary") require.NotNil(t, idxInfo) - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) @@ -513,9 +523,11 @@ func TestAdminCleanupIndex(t *testing.T) { tblInfo := tbl.Meta() idxInfo2 := tblInfo.FindIndexByName("c2") - indexOpr2 := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo2) + indexOpr2, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo2) + require.NoError(t, err) idxInfo3 := tblInfo.FindIndexByName("c3") - indexOpr3 := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo3) + indexOpr3, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo3) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) @@ -584,9 +596,11 @@ func TestAdminCleanupIndexForPartitionTable(t *testing.T) { checkFunc := func(tbl table.Table, pid int64, idxValue, handle int) { idxInfo2 := tbl.Meta().FindIndexByName("c2") - indexOpr2 := tables.NewIndex(pid, tbl.Meta(), idxInfo2) + indexOpr2, err := tables.NewIndex(pid, tbl.Meta(), idxInfo2) + require.NoError(t, err) idxInfo3 := tbl.Meta().FindIndexByName("c3") - indexOpr3 := tables.NewIndex(pid, tbl.Meta(), idxInfo3) + indexOpr3, err := tables.NewIndex(pid, tbl.Meta(), idxInfo3) + require.NoError(t, err) txn, err := store.Begin() sctx := mock.NewContext() @@ -673,7 +687,8 @@ func TestAdminCleanupIndexPKNotHandle(t *testing.T) { tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("primary") - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) @@ -724,9 +739,11 @@ func TestAdminCleanupIndexMore(t *testing.T) { tblInfo := tbl.Meta() idxInfo1 := tblInfo.FindIndexByName("c1") - indexOpr1 := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo1) + indexOpr1, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo1) + require.NoError(t, err) idxInfo2 := tblInfo.FindIndexByName("c2") - indexOpr2 := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo2) + indexOpr2, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo2) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) @@ -792,9 +809,11 @@ func TestClusteredAdminCleanupIndex(t *testing.T) { tblInfo := tbl.Meta() idxInfo2 := tblInfo.FindIndexByName("c2") - indexOpr2 := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo2) + indexOpr2, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo2) + require.NoError(t, err) idxInfo3 := tblInfo.FindIndexByName("c3") - indexOpr3 := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo3) + indexOpr3, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo3) + require.NoError(t, err) c2DanglingIdx := []struct { handle kv.Handle @@ -875,7 +894,8 @@ func TestAdminCheckTableWithMultiValuedIndex(t *testing.T) { cpIdx := idxInfo.Clone() cpIdx.MVIndex = false - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, cpIdx) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, cpIdx) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) err = indexOpr.Delete(ctx, txn, types.MakeDatums(0), kv.IntHandle(0)) @@ -932,7 +952,8 @@ func TestAdminCheckPartitionTableFailed(t *testing.T) { // Table count > index count. for i := 0; i <= 5; i++ { partitionIdx := i % len(tblInfo.GetPartitionInfo().Definitions) - indexOpr := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[partitionIdx].ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[partitionIdx].ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) err = indexOpr.Delete(ctx, txn, types.MakeDatums(i), kv.IntHandle(i)) @@ -961,7 +982,8 @@ func TestAdminCheckPartitionTableFailed(t *testing.T) { // Table count < index count. for i := 0; i <= 5; i++ { partitionIdx := i % len(tblInfo.GetPartitionInfo().Definitions) - indexOpr := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[partitionIdx].ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[partitionIdx].ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) _, err = indexOpr.Create(ctx, txn, types.MakeDatums(i+8), kv.IntHandle(i+8), nil) @@ -984,7 +1006,8 @@ func TestAdminCheckPartitionTableFailed(t *testing.T) { // Table count = index count, but the index value was wrong. for i := 0; i <= 5; i++ { partitionIdx := i % len(tblInfo.GetPartitionInfo().Definitions) - indexOpr := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[partitionIdx].ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[partitionIdx].ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) _, err = indexOpr.Create(ctx, txn, types.MakeDatums(i+8), kv.IntHandle(i), nil) @@ -1054,8 +1077,10 @@ func (tk *inconsistencyTestKit) rebuild() { is := domain.GetDomain(testkit.TryRetrieveSession(tk.ctx)).InfoSchema() tbl, err := is.TableByName(context.Background(), pmodel.NewCIStr(dbName), pmodel.NewCIStr(tblName)) require.NoError(tk.t, err) - tk.uniqueIndex = tables.NewIndex(tbl.Meta().ID, tbl.Meta(), tbl.Meta().Indices[0]) - tk.plainIndex = tables.NewIndex(tbl.Meta().ID, tbl.Meta(), tbl.Meta().Indices[1]) + tk.uniqueIndex, err = tables.NewIndex(tbl.Meta().ID, tbl.Meta(), tbl.Meta().Indices[0]) + require.NoError(tk.t, err) + tk.plainIndex, err = tables.NewIndex(tbl.Meta().ID, tbl.Meta(), tbl.Meta().Indices[1]) + require.NoError(tk.t, err) } func TestCheckFailReport(t *testing.T) { @@ -1311,7 +1336,8 @@ func TestAdminCheckWithSnapshot(t *testing.T) { tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("a") - idxOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + idxOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) _, err = idxOpr.Create(ctx, txn, types.MakeDatums(2), kv.IntHandle(100), nil) @@ -1369,7 +1395,8 @@ func TestAdminCheckTableFailed(t *testing.T) { require.NoError(t, err) tblInfo := tbl.Meta() idxInfo := tblInfo.Indices[1] - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) tk.Session().GetSessionVars().IndexLookupSize = 3 tk.Session().GetSessionVars().MaxChunkSize = 3 @@ -1536,7 +1563,8 @@ func TestAdminCheckTableErrorLocate(t *testing.T) { require.NoError(t, err) tblInfo := tbl.Meta() idxInfo := tblInfo.Indices[0] - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) return indexOpr } @@ -1702,7 +1730,8 @@ func TestAdminCheckTableErrorLocateForClusterIndex(t *testing.T) { require.NoError(t, err) tblInfo := tbl.Meta() idxInfo := tblInfo.Indices[0] - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) pattern := "handle:\\s(\\d+)" r := regexp.MustCompile(pattern) @@ -1884,7 +1913,8 @@ func TestAdminRecoverGlobalIndex(t *testing.T) { idx := tbl.Indices()[0] require.NotNil(t, idx) - indexOpr := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[2].ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[2].ID, tblInfo, idxInfo) + require.NoError(t, err) // Reduce one row of index. // Index count < table count, (-1, -10, 2) is deleted. @@ -1946,7 +1976,8 @@ func TestAdminCheckGlobalIndex(t *testing.T) { require.True(t, consistency.ErrAdminCheckInconsistent.Equal(err)) require.ErrorContains(t, err, "[admin:8223]data inconsistency in table: admin_test, index: uidx_a, handle: 4, index-values:\"handle: 4, values: [KindInt64 2") - indexOpr := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[0].ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[0].ID, tblInfo, idxInfo) + require.NoError(t, err) // Remove corresponding index key/value. // Admin check table will success. txn, err = store.Begin() @@ -1957,7 +1988,8 @@ func TestAdminCheckGlobalIndex(t *testing.T) { require.NoError(t, err) tk.MustExec("admin check table admin_test") - indexOpr = tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[2].ID, tblInfo, idxInfo) + indexOpr, err = tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[2].ID, tblInfo, idxInfo) + require.NoError(t, err) // Reduce one row of index. // Index count < table count, (-1, -10, 2) is deleted. @@ -2040,7 +2072,8 @@ func TestAdminCheckGlobalIndexWithClusterIndex(t *testing.T) { require.True(t, consistency.ErrAdminCheckInconsistent.Equal(err)) require.ErrorContains(t, err, "[admin:8223]data inconsistency in table: admin_test, index: uidx_a, handle: 0, index-values:\"handle: 0, values: [KindInt64 2") - indexOpr := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[0].ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[0].ID, tblInfo, idxInfo) + require.NoError(t, err) // Remove corresponding index key/value. // Admin check table will success. txn, err = store.Begin() @@ -2051,7 +2084,8 @@ func TestAdminCheckGlobalIndexWithClusterIndex(t *testing.T) { require.NoError(t, err) tk.MustExec("admin check table admin_test") - indexOpr = tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[2].ID, tblInfo, idxInfo) + indexOpr, err = tables.NewIndex(tblInfo.GetPartitionInfo().Definitions[2].ID, tblInfo, idxInfo) + require.NoError(t, err) // Reduce one row of index. // Index count < table count, (-1, -10, 2) is deleted. txn, err = store.Begin() @@ -2134,3 +2168,49 @@ func TestAdminCheckGlobalIndexDuringDDL(t *testing.T) { } } } +<<<<<<< HEAD +======= + +func TestAdminCheckGeneratedColumns(t *testing.T) { + store, domain := testkit.CreateMockStoreAndDomain(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("DROP TABLE IF EXISTS t") + tk.MustExec("CREATE TABLE t(pk int PRIMARY KEY CLUSTERED, val int, gen int GENERATED ALWAYS AS (val * pk) VIRTUAL, KEY idx_gen(gen))") + tk.MustExec("INSERT INTO t(pk, val) VALUES (2, 5)") + tk.MustExec("ADMIN CHECK TABLE t") + + // Make some corrupted index. Build the index information. + sctx := mock.NewContext() + sctx.Store = store + ctx := sctx.GetTableCtx() + is := domain.InfoSchema() + dbName := ast.NewCIStr("test") + tblName := ast.NewCIStr("t") + tbl, err := is.TableByName(context.Background(), dbName, tblName) + require.NoError(t, err) + tblInfo := tbl.Meta() + idxInfo := tblInfo.Indices[0] + tk.Session().GetSessionVars().IndexLookupSize = 3 + tk.Session().GetSessionVars().MaxChunkSize = 3 + + // Simulate inconsistent index column + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) + txn, err := store.Begin() + require.NoError(t, err) + err = indexOpr.Delete(ctx, txn, types.MakeDatums(10), kv.IntHandle(2)) + require.NoError(t, err) + _, err = indexOpr.Create(ctx, txn, types.MakeDatums(5), kv.IntHandle(2), nil) + require.NoError(t, err) + err = txn.Commit(context.Background()) + require.NoError(t, err) + + for _, enabled := range []bool{false, true} { + tk.MustExec(fmt.Sprintf("set tidb_enable_fast_table_check = %v", enabled)) + err = tk.ExecToErr("admin check table t") + require.Error(t, err) + } +} +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) diff --git a/pkg/executor/test/writetest/write_test.go b/pkg/executor/test/writetest/write_test.go index b1c420e3352bc..6b82b525ea3ae 100644 --- a/pkg/executor/test/writetest/write_test.go +++ b/pkg/executor/test/writetest/write_test.go @@ -342,7 +342,8 @@ func TestReplaceLog(t *testing.T) { require.NoError(t, err) tblInfo := tbl.Meta() idxInfo := tblInfo.FindIndexByName("b") - indexOpr := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + indexOpr, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) txn, err := store.Begin() require.NoError(t, err) diff --git a/pkg/infoschema/perfschema/tables.go b/pkg/infoschema/perfschema/tables.go index bfb804f9bb673..fe78207b658d6 100644 --- a/pkg/infoschema/perfschema/tables.go +++ b/pkg/infoschema/perfschema/tables.go @@ -224,7 +224,10 @@ func initTableIndices(t *perfSchemaTable) error { if idxInfo.State == model.StateNone { return table.ErrIndexStateCantNone.GenWithStackByArgs(idxInfo.Name) } - idx := tables.NewIndex(t.meta.ID, tblInfo, idxInfo) + idx, err := tables.NewIndex(t.meta.ID, tblInfo, idxInfo) + if err != nil { + return err + } t.indices = append(t.indices, idx) } return nil diff --git a/pkg/meta/model/BUILD.bazel b/pkg/meta/model/BUILD.bazel index 65ebec22e19aa..84ca9cf0f2bc7 100644 --- a/pkg/meta/model/BUILD.bazel +++ b/pkg/meta/model/BUILD.bazel @@ -18,6 +18,11 @@ go_library( importpath = "github.com/pingcap/tidb/pkg/meta/model", visibility = ["//visibility:public"], deps = [ +<<<<<<< HEAD +======= + "//pkg/config/kerneltype", + "//pkg/parser", +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) "//pkg/parser/ast", "//pkg/parser/auth", "//pkg/parser/charset", diff --git a/pkg/meta/model/index.go b/pkg/meta/model/index.go index 130bfe4c9d64d..d39d980a73dce 100644 --- a/pkg/meta/model/index.go +++ b/pkg/meta/model/index.go @@ -18,6 +18,7 @@ import ( "fmt" "strings" + "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/types" @@ -105,6 +106,7 @@ type VectorIndexInfo struct { // It corresponds to the statement `CREATE INDEX Name ON Table (Column);` // See https://dev.mysql.com/doc/refman/5.7/en/create-index.html type IndexInfo struct { +<<<<<<< HEAD ID int64 `json:"id"` Name model.CIStr `json:"idx_name"` // Index name. Table model.CIStr `json:"tbl_name"` // Table name. @@ -126,6 +128,47 @@ type IndexInfo struct { // 1=v1 with partition ID in key and value. // 2=v2 with partition ID in key only (TODO). GlobalIndexVersion uint8 `json:"global_index_version,omitempty"` +======= + ID int64 `json:"id"` + Name ast.CIStr `json:"idx_name"` // Index name. + Table ast.CIStr `json:"tbl_name"` // Table name. + Columns []*IndexColumn `json:"idx_cols"` // Index columns. + State SchemaState `json:"state"` + BackfillState BackfillState `json:"backfill_state"` + Comment string `json:"comment"` // Comment + Tp ast.IndexType `json:"index_type"` // Index type: Btree, Hash, Rtree, Vector, Inverted, Fulltext + Unique bool `json:"is_unique"` // Whether the index is unique. + Primary bool `json:"is_primary"` // Whether the index is primary key. + Invisible bool `json:"is_invisible"` // Whether the index is invisible. + Global bool `json:"is_global"` // Whether the index is global. + MVIndex bool `json:"mv_index"` // Whether the index is multivalued index. + VectorInfo *VectorIndexInfo `json:"vector_index"` // VectorInfo is the vector index information. + InvertedInfo *InvertedIndexInfo `json:"inverted_index"` // InvertedInfo is the inverted index information. + FullTextInfo *FullTextIndexInfo `json:"full_text_index"` // FullTextInfo is the FULLTEXT index information. + ConditionExprString string `json:"condition_expr_string"` // ConditionExprString is the string representation of the partial index condition. + AffectColumn []*IndexColumn `json:"affect_column,omitempty"` // AffectColumn is the columns related to the index. +} + +// Hash64 implement HashEquals interface. +func (index *IndexInfo) Hash64(h base.Hasher) { + h.HashInt64(index.ID) +} + +// Equals implements HashEquals interface. +func (index *IndexInfo) Equals(other any) bool { + // any(nil) can still be converted as (*IndexInfo)(nil) + index2, ok := other.(*IndexInfo) + if !ok { + return false + } + if index == nil { + return index2 == nil + } + if index2 == nil { + return false + } + return index.ID == index2.ID +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } // Clone clones IndexInfo. @@ -204,6 +247,21 @@ func (index *IndexInfo) IsTiFlashLocalIndex() bool { return index.VectorInfo != nil } +// HasCondition checks whether the index has a partial index condition. +func (index *IndexInfo) HasCondition() bool { + return len(index.ConditionExprString) > 0 +} + +// ConditionExpr parses and returns the condition expression of the partial index. +func (index *IndexInfo) ConditionExpr() (ast.ExprNode, error) { + stmtStr := "select " + index.ConditionExprString + stmts, _, err := parser.New().ParseSQL(stmtStr) + if err != nil { + return nil, err + } + return stmts[0].(*ast.SelectStmt).Fields.Fields[0].Expr, nil +} + // FindIndexByColumns find IndexInfo in indices which is cover the specified columns. func FindIndexByColumns(tbInfo *TableInfo, indices []*IndexInfo, cols ...model.CIStr) *IndexInfo { for _, index := range indices { diff --git a/pkg/meta/model/job_args.go b/pkg/meta/model/job_args.go index 3d90ccfb247fc..9748a529d8e76 100644 --- a/pkg/meta/model/job_args.go +++ b/pkg/meta/model/job_args.go @@ -1357,6 +1357,38 @@ type IndexArg struct { IndexID int64 `json:"index_id,omitempty"` IfExist bool `json:"if_exist,omitempty"` IsGlobal bool `json:"is_global,omitempty"` +<<<<<<< HEAD +======= + + // Only used for job args v2. + SplitOpt *IndexArgSplitOpt `json:"split_opt,omitempty"` + + // ConditionString is used to store the partial index condition string for the index. + ConditionString string `json:"condition_string,omitempty"` +} + +// GetColumnarIndexType gets the real columnar index type in a backward compatibility way. +func (a *IndexArg) GetColumnarIndexType() ColumnarIndexType { + // For compatibility, if columnar index type is not set, and it's a columnar index, it's a vector index. + + // If the columnar index type is NA and it's not a columnar index, it's a general index. + if a.ColumnarIndexType == ColumnarIndexTypeNA && !a.IsColumnar { + return ColumnarIndexTypeNA + } + // If the columnar index type is NA and it's a columnar index, it's a vector index. + if a.ColumnarIndexType == ColumnarIndexTypeNA && a.IsColumnar { + return ColumnarIndexTypeVector + } + return a.ColumnarIndexType +} + +// IndexArgSplitOpt is a field of IndexArg used by index presplit. +type IndexArgSplitOpt struct { + Lower []string `json:"lower,omitempty"` + Upper []string `json:"upper,omitempty"` + Num int64 `json:"num,omitempty"` + ValueLists [][]string `json:"value_lists,omitempty"` +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } // ModifyIndexArgs is the argument for add/drop/rename index jobs, diff --git a/pkg/planner/core/expression_codec_fn.go b/pkg/planner/core/expression_codec_fn.go index 02d7e0fa91495..9c661b2acdb4e 100644 --- a/pkg/planner/core/expression_codec_fn.go +++ b/pkg/planner/core/expression_codec_fn.go @@ -228,7 +228,10 @@ func (h tidbCodecFuncHelper) encodeIndexKeyFromRow( } tablecodec.TruncateIndexValues(tblInfo, idxInfo, idxDts) // Use physicalID instead of tblInfo.ID here to handle the partition case. - idx := tables.NewIndex(physicalID, tblInfo, idxInfo) + idx, err := tables.NewIndex(physicalID, tblInfo, idxInfo) + if err != nil { + return nil, false, err + } idxKey, _, err := idx.GenIndexKey(ctx.ErrCtx(), ctx.Location(), idxDts, handle, nil) return idxKey, false, err diff --git a/pkg/planner/core/logical_plan_builder.go b/pkg/planner/core/logical_plan_builder.go index ead149d7bdf80..f4a92995b4375 100644 --- a/pkg/planner/core/logical_plan_builder.go +++ b/pkg/planner/core/logical_plan_builder.go @@ -5462,10 +5462,23 @@ func pruneAndBuildColPositionInfoForDelete( cols2PosInfo := &cols2PosInfos[i] tbl := tblID2Table[cols2PosInfo.TblID] tblInfo := tbl.Meta() - // If it's partitioned table, or has foreign keys, or is point get plan, we can't prune the columns, currently. + // If it's partitioned table, or has foreign keys, or has partial index, or is point get plan, we can't prune the columns, currently. // nonPrunedSet will be nil if it's a point get or has foreign keys. +<<<<<<< HEAD if tblInfo.GetPartitionInfo() != nil || hasFK || nonPruned == nil { err = buildSingleTableColPosInfoForDelete(tbl, cols2PosInfo, prunedColCnt) +======= + skipPruning := tblInfo.GetPartitionInfo() != nil || hasFK || nonPruned == nil + for _, idx := range tblInfo.Indices { + if len(idx.ConditionExprString) > 0 { + // If the index has a partial index condition, we can't prune the columns. + skipPruning = true + break + } + } + if skipPruning { + err = buildSingleTableColPosInfoForDelete(tbl, cols2PosInfo) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) if err != nil { return nil, nil, err } diff --git a/pkg/server/tests/commontest/tidb_test.go b/pkg/server/tests/commontest/tidb_test.go index bcf80f3d6c89e..0e6b664d2d0c0 100644 --- a/pkg/server/tests/commontest/tidb_test.go +++ b/pkg/server/tests/commontest/tidb_test.go @@ -2284,10 +2284,17 @@ func TestTopSQLResourceTag(t *testing.T) { reqs []tikvrpc.CmdType }{ {"replace into mysql.global_variables (variable_name,variable_value) values ('tidb_enable_1pc', '1')", []tikvrpc.CmdType{tikvrpc.CmdPrewrite, tikvrpc.CmdCommit, tikvrpc.CmdBatchGet}}, +<<<<<<< HEAD {"select /*+ read_from_storage(tikv[`stmtstats`.`t`]) */ bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index() where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, {"select bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index(`idx`) where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, {"select /*+ read_from_storage(tikv[`stmtstats`.`t`]) */ bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index() where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, {"select bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index(`idx`) where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, +======= + {"select /*+ read_from_storage(tikv[`stmtstats`.`t`]), AGG_TO_COP() */ bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index() where (0 = 0) group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, + {"select /*+ AGG_TO_COP() */ bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index(`idx`) where (0 = 0) group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, + {"select /*+ read_from_storage(tikv[`stmtstats`.`t`]), AGG_TO_COP() */ bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index() where (0 = 0) group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, + {"select /*+ AGG_TO_COP() */ bit_xor(crc32(md5(concat_ws(0x2, `_tidb_rowid`, `a`)))), ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024), count(*) from `stmtstats`.`t` use index(`idx`) where (0 = 0) group by ((cast(crc32(md5(concat_ws(0x2, `_tidb_rowid`))) as signed) - 0) div 1 % 1024)", []tikvrpc.CmdType{tikvrpc.CmdCop}}, +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) } executeCaseFn := func(execFn func(db *sql.DB)) { dsn := ts.GetDSN(func(config *mysql.Config) { diff --git a/pkg/table/index.go b/pkg/table/index.go index e14c8725a69e8..81c20f572bc4a 100644 --- a/pkg/table/index.go +++ b/pkg/table/index.go @@ -87,11 +87,19 @@ type Index interface { Meta() *model.IndexInfo // TableMeta returns TableInfo TableMeta() *model.TableInfo + // MeetPartialCondition returns true if the row meets the partial index condition of the index. + MeetPartialCondition(row []types.Datum) (bool, error) // Create supports insert into statement. + // The `Create` inserts the index without considering the partial index condition. The caller should call `MeetPartialCondition` to check whether the + // row meets the partial index condition before calling `Create` to avoid unnecessary index creation. Create(ctx MutateContext, txn kv.Transaction, indexedValues []types.Datum, h kv.Handle, handleRestoreData []types.Datum, opts ...CreateIdxOption) (kv.Handle, error) // Delete supports delete from statement. + // The `Delete` deletes the index without considering the partial index condition. The caller should call `MeetPartialCondition` to check whether the + // row meets the partial index condition before calling `Delete` to avoid unnecessary index deletion. Delete(ctx MutateContext, txn kv.Transaction, indexedValues []types.Datum, h kv.Handle) error // GenIndexKVIter generate index key and value for multi-valued index, use iterator to reduce the memory allocation. + // `GenIndexKVIter` doesn't consider the partial index condition, the caller should call `MeetPartialCondition` to check. If the row doesn't meet + // the condition, it's suggested to use an empty kv generator instead. GenIndexKVIter(ec errctx.Context, loc *time.Location, indexedValue []types.Datum, h kv.Handle, handleRestoreData []types.Datum) IndexKVGenerator // Exist supports check index exists or not. Exist(ec errctx.Context, loc *time.Location, txn kv.Transaction, indexedValues []types.Datum, h kv.Handle) (bool, kv.Handle, error) diff --git a/pkg/table/tables/BUILD.bazel b/pkg/table/tables/BUILD.bazel index 2de16e0579c36..1c15f73118472 100644 --- a/pkg/table/tables/BUILD.bazel +++ b/pkg/table/tables/BUILD.bazel @@ -29,6 +29,11 @@ go_library( "//pkg/parser/mysql", "//pkg/parser/terror", "//pkg/sessionctx", +<<<<<<< HEAD +======= + "//pkg/sessionctx/stmtctx", + "//pkg/sessionctx/vardef", +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) "//pkg/sessionctx/variable", "//pkg/statistics", "//pkg/table", @@ -39,6 +44,7 @@ go_library( "//pkg/util/chunk", "//pkg/util/codec", "//pkg/util/collate", + "//pkg/util/context", "//pkg/util/dbterror", "//pkg/util/generatedexpr", "//pkg/util/hack", @@ -75,11 +81,17 @@ go_test( ], embed = [":tables"], flaky = True, +<<<<<<< HEAD shard_count = 35, +======= + shard_count = 42, +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) deps = [ "//pkg/ddl", "//pkg/domain", "//pkg/errctx", + "//pkg/errno", + "//pkg/expression/exprstatic", "//pkg/infoschema", "//pkg/kv", "//pkg/lightning/backend/encode", @@ -99,8 +111,10 @@ go_test( "//pkg/sessionctx/variable", "//pkg/sessiontxn", "//pkg/table", + "//pkg/table/tables/testutil", "//pkg/tablecodec", "//pkg/testkit", + "//pkg/testkit/testfailpoint", "//pkg/testkit/testsetup", "//pkg/types", "//pkg/util", diff --git a/pkg/table/tables/index.go b/pkg/table/tables/index.go index 9499fc8923a40..3fd70eda3b2c7 100644 --- a/pkg/table/tables/index.go +++ b/pkg/table/tables/index.go @@ -21,19 +21,37 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/errctx" + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/expression/exprctx" + "github.com/pingcap/tidb/pkg/expression/exprstatic" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/metrics" "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/sessionctx/stmtctx" "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util" + "github.com/pingcap/tidb/pkg/util/chunk" + contextutil "github.com/pingcap/tidb/pkg/util/context" "github.com/pingcap/tidb/pkg/util/intest" + "github.com/pingcap/tidb/pkg/util/logutil" "github.com/pingcap/tidb/pkg/util/rowcodec" "github.com/pingcap/tidb/pkg/util/tracing" + "go.uber.org/zap" ) +var indexConditionECtx exprctx.BuildContext + +// indexPartialCondition is a data structure to help implement the partial index. +type indexPartialCondition struct { + conditionExpr expression.Expression + // conditionEvalBufferPool stores many eval buffer to avoid allocating chunk for evaluating partial index condition for each time. + // It's only initialized if the `partialConditionExpr` is not nil. + conditionEvalBufferPool sync.Pool +} + // index is the data structure for index data in the KV store. type index struct { idxInfo *model.IndexInfo @@ -44,6 +62,8 @@ type index struct { // the collation global variable is initialized *after* `NewIndex()`. initNeedRestoreData sync.Once needRestoredData bool + + indexPartialCondition } // NeedRestoredData checks whether the index columns needs restored data. @@ -57,13 +77,43 @@ func NeedRestoredData(idxCols []*model.IndexColumn, colInfos []*model.ColumnInfo } // NewIndex builds a new Index object. -func NewIndex(physicalID int64, tblInfo *model.TableInfo, indexInfo *model.IndexInfo) table.Index { +func NewIndex(physicalID int64, tblInfo *model.TableInfo, indexInfo *model.IndexInfo) (table.Index, error) { index := &index{ idxInfo: indexInfo, tblInfo: tblInfo, phyTblID: physicalID, } - return index + + conditionString := indexInfo.ConditionExprString + if len(conditionString) > 0 { + var err error + index.conditionExpr, err = expression.ParseSimpleExpr(indexConditionECtx, conditionString, expression.WithTableInfo("", tblInfo)) + if err != nil { + return nil, errors.Trace(err) + } + index.conditionEvalBufferPool = sync.Pool{ + New: func() any { + // For INSERT path, it'll only pass all writable columns. + // For UPDATE/DELETE path, it'll contain all columns. + // As the writable columns are always at the beginning of the `tblInfo.Columns`, it'll not affect + // the offsets of related columns in the expression. Therefore, it's fine to always record all + // columns here. + evalBufferTypes := make([]*types.FieldType, 0, len(tblInfo.Columns)+1) + for _, col := range tblInfo.Columns { + evalBufferTypes = append(evalBufferTypes, &col.FieldType) + } + + if !tblInfo.HasClusteredIndex() { + // If the table doesn't have clustered index, we need to append an extra handle column. + evalBufferTypes = append(evalBufferTypes, types.NewFieldType(mysql.TypeLonglong)) + } + + evalBuffer := chunk.MutRowFromTypes(evalBufferTypes) + return &evalBuffer + }, + } + } + return index, nil } // Meta returns index info. @@ -189,6 +239,34 @@ out: return vals } +// MeetPartialCondition checks whether the row meets the partial index condition of the index. +func (c *index) MeetPartialCondition(row []types.Datum) (meet bool, err error) { + defer func() { + r := recover() + if r != nil { + err = errors.Errorf("panic in MeetPartialCondition: %v", r) + intest.Assert(false, "should never panic in MeetPartialCondition") + logutil.BgLogger().Warn("panic in MeetPartialCondition", zap.Error(err), zap.Any("recover message", r)) + } + }() + + if c.conditionExpr == nil { + return true, nil + } + + evalBuffer := c.conditionEvalBufferPool.Get().(*chunk.MutRow) + defer c.conditionEvalBufferPool.Put(evalBuffer) + evalBuffer.SetDatums(row...) + + datum, isNull, err := c.conditionExpr.EvalInt(indexConditionECtx.GetEvalCtx(), evalBuffer.ToRow()) + if err != nil { + return false, err + } + // If the result is NULL, it usually means the original column itself is NULL. + // In this case, we should refuse to consider the index for partial index condition. + return datum > 0 && !isNull, nil +} + // Create creates a new entry in the kvIndex data. // If the index is unique and there is an existing entry with the same key, // Create will return the existing entry's handle as the first return value, ErrKeyExists as the second return value. @@ -844,3 +922,82 @@ func GenIndexValueFromIndex(key []byte, value []byte, tblInfo *model.TableInfo, return valueStr, nil } + +// ExtractColumnsFromCondition returns the columns that are referenced in the index condition expression. +// If `includeColumnsReferencedByVirtualGeneratedColumns` is true, it will recursively extract the columns from the virtual generated columns. +// The returned columns might be duplicated. +func ExtractColumnsFromCondition(ctx expression.BuildContext, idxInfo *model.IndexInfo, tblInfo *model.TableInfo, includeColumnsReferencedByVirtualGeneratedColumns bool) ([]*model.IndexColumn, error) { + if len(idxInfo.ConditionExprString) == 0 { + return nil, nil + } + + expr, err := expression.ParseSimpleExpr(ctx, idxInfo.ConditionExprString, expression.WithTableInfo("", tblInfo)) + if err != nil { + return nil, err + } + return extractColumnsFromExpr(expr, tblInfo, includeColumnsReferencedByVirtualGeneratedColumns) +} + +// DedupIndexColumns deduplicates the index columns based on their Offset. +func DedupIndexColumns(cols []*model.IndexColumn) []*model.IndexColumn { + if len(cols) <= 1 { + return cols + } + + seen := make(map[int]struct{}, len(cols)) + result := make([]*model.IndexColumn, 0, len(cols)) + for _, col := range cols { + if _, found := seen[col.Offset]; !found { + seen[col.Offset] = struct{}{} + result = append(result, col) + } + } + return result +} + +// extractColumnsFromExpr extracts the columns from the given expression. +// If `includeVirtualGeneratedColumn` is true, it will recursively extract the columns from the virtual generated columns. +// The returned columns might be duplicated. +func extractColumnsFromExpr(expr expression.Expression, tblInfo *model.TableInfo, includeVirtualGeneratedColumn bool) ([]*model.IndexColumn, error) { + var neededCols []*model.IndexColumn + cols := expression.ExtractColumns(expr) + for _, col := range cols { + if tblInfo.Columns[col.Index].IsVirtualGenerated() { + if includeVirtualGeneratedColumn { + depCols, err := extractColumnsFromExpr(col.VirtualExpr, tblInfo, includeVirtualGeneratedColumn) + if err != nil { + return nil, err + } + + neededCols = append(neededCols, depCols...) + } + + neededCols = append(neededCols, &model.IndexColumn{ + Name: tblInfo.Columns[col.Index].Name, + Offset: col.Index, + }) + } else { + neededCols = append(neededCols, &model.IndexColumn{ + Name: tblInfo.Columns[col.Index].Name, + Offset: col.Index, + }) + } + } + + return neededCols, nil +} + +func init() { + evalCtx := exprstatic.NewEvalContext( + exprstatic.WithSQLMode(mysql.ModeNone), + exprstatic.WithTypeFlags(types.DefaultStmtFlags), + exprstatic.WithErrLevelMap(stmtctx.DefaultStmtErrLevels), + ) + + planCacheTracker := contextutil.NewPlanCacheTracker(contextutil.IgnoreWarn) + + indexConditionECtx = exprstatic.NewExprContext( + exprstatic.WithEvalCtx(evalCtx), + exprstatic.WithPlanCacheTracker(&planCacheTracker), + ) +} diff --git a/pkg/table/tables/index_test.go b/pkg/table/tables/index_test.go index 1c0b0a84ceb9f..4bcdb62445f1d 100644 --- a/pkg/table/tables/index_test.go +++ b/pkg/table/tables/index_test.go @@ -17,11 +17,14 @@ package tables_test import ( "context" "fmt" + "math/rand/v2" "strings" "testing" "time" "github.com/pingcap/tidb/pkg/ddl" + "github.com/pingcap/tidb/pkg/errno" + "github.com/pingcap/tidb/pkg/expression/exprstatic" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/lightning/backend/encode" lkv "github.com/pingcap/tidb/pkg/lightning/backend/kv" @@ -33,8 +36,10 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/table/tables" + "github.com/pingcap/tidb/pkg/table/tables/testutil" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/testkit" + "github.com/pingcap/tidb/pkg/testkit/testfailpoint" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/mock" @@ -46,7 +51,8 @@ func TestMultiColumnCommonHandle(t *testing.T) { tblInfo := buildTableInfo(t, "create table t (a int, b int, u varchar(64) unique, nu varchar(64), primary key (a, b), index nu (nu))") var idxUnique, idxNonUnique table.Index for _, idxInfo := range tblInfo.Indices { - idx := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + idx, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) if idxInfo.Name.L == "u" { idxUnique = idx } else if idxInfo.Name.L == "nu" { @@ -118,7 +124,8 @@ func TestSingleColumnCommonHandle(t *testing.T) { tblInfo := buildTableInfo(t, "create table t (a varchar(255) primary key, u int unique, nu int, index nu (nu))") var idxUnique, idxNonUnique table.Index for _, idxInfo := range tblInfo.Indices { - idx := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + idx, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) if idxInfo.Name.L == "u" { idxUnique = idx } else if idxInfo.Name.L == "nu" { @@ -221,9 +228,11 @@ func TestGenIndexValueWithLargePaddingSize(t *testing.T) { // ref https://github.com/pingcap/tidb/issues/47115 tblInfo := buildTableInfo(t, "create table t (a int, b int, k varchar(255), primary key (a, b), key (k))") var idx table.Index + var err error for _, idxInfo := range tblInfo.Indices { if !idxInfo.Primary { - idx = tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + idx, err = tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) break } } @@ -382,7 +391,8 @@ func TestForceLockNonUniqueIndexInDDLMergingTempIndex(t *testing.T) { store := testkit.CreateMockStore(t) h := kv.IntHandle(1) indexedValues := []types.Datum{types.NewIntDatum(100)} - idx := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + idx, err := tables.NewIndex(tblInfo.ID, tblInfo, idxInfo) + require.NoError(t, err) indexKey, distinct, err := idx.GenIndexKey(mockCtx.ErrCtx(), time.UTC, indexedValues, h, nil) require.NoError(t, err) require.False(t, distinct) @@ -422,3 +432,302 @@ func TestForceLockNonUniqueIndexInDDLMergingTempIndex(t *testing.T) { }) } } + +func TestMeetPartialCondition(t *testing.T) { + // The index name for the index must be `testidx` + type testCase struct { + tableDefinition string + row []any + meet bool + } + testCases := []testCase{ + // cluster index case + { + tableDefinition: "create table t (a int, b int, c int, primary key (a, b), key testidx(c) where c > 2)", + row: []any{1, 2, 3}, + meet: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, primary key (a, b), key testidx(c) where c > 3)", + row: []any{1, 2, 3}, + meet: false, + }, + // primary as handle case + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 2)", + row: []any{1, 2, 3}, + meet: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 3)", + row: []any{1, 2, 3}, + meet: false, + }, + // tidb rowid case + { + tableDefinition: "create table t (a int, b int, c int, key testidx(c) where c > 2)", + row: []any{1, 2, 3, 100}, + meet: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, key testidx(c) where c > 3)", + row: []any{1, 2, 3, 500}, + meet: false, + }, + } + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + for _, tc := range testCases { + tk.MustExec(tc.tableDefinition) + + tbl, err := dom.InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t")) + require.NoError(t, err) + require.NotNil(t, t) + var idx table.Index + for _, i := range tbl.Indices() { + if i.Meta().Name.L == "testidx" { + idx = i + break + } + } + + rowData := types.MakeDatums(tc.row...) + + meet, err := idx.MeetPartialCondition(rowData) + require.NoError(t, err) + require.Equal(t, tc.meet, meet) + + tk.MustExec("drop table t") + } +} + +func TestPartialIndexDML(t *testing.T) { + // The index name for `indexDefinition` must be `testidx` + type testCase struct { + tableDefinition string + dml []string + shouldCreateIndex bool + } + testCases := []testCase{ + // cluster index case + { + tableDefinition: "create table t (a int, b int, c int, primary key (a, b), key testidx(c) where c > 2)", + dml: []string{"insert into t values (1, 2, 3)"}, + shouldCreateIndex: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, primary key (a, b), key testidx(c) where c > 3)", + dml: []string{"insert into t values (1, 2, 3)"}, + shouldCreateIndex: false, + }, + // primary as handle case + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 2)", + dml: []string{"insert into t values (1, 2, 3)"}, + shouldCreateIndex: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 3)", + dml: []string{"insert into t values (1, 2, 3)"}, + shouldCreateIndex: false, + }, + // tidb rowid case + { + tableDefinition: "create table t (a int, b int, c int, key testidx(c) where c > 2)", + dml: []string{"insert into t values (1, 2, 3)"}, + shouldCreateIndex: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, key testidx(c) where c > 3)", + dml: []string{"insert into t values (1, 2, 3)"}, + shouldCreateIndex: false, + }, + // update case + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 2)", + dml: []string{"insert into t values (1, 2, 3)", "update t set c = 4 where a = 1"}, + shouldCreateIndex: true, + }, + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 2)", + dml: []string{"insert into t values (1, 2, 3)", "update t set c = 1 where a = 1"}, + shouldCreateIndex: false, + }, + { + tableDefinition: "create table t (a int, b int, c int, primary key (a), key testidx(c) where c > 2)", + dml: []string{"insert into t values (1, 2, 1)", "update t set c = 3 where a = 1"}, + shouldCreateIndex: true, + }, + } + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + for _, tc := range testCases { + tk.MustExec(tc.tableDefinition) + + for _, dml := range tc.dml { + tk.MustExec(dml) + } + if tc.shouldCreateIndex { + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 1) + } else { + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 0) + } + + if tc.shouldCreateIndex { + // test delete + tk.MustExec("delete from t") + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 0) + } + + tk.MustExec("drop table t") + } +} + +func TestExtractColumnsFromCondition(t *testing.T) { + // Mock the necessary context and inputs + ctx := exprstatic.NewExprContext() + tblInfo := &model.TableInfo{ + Name: ast.NewCIStr("test_table"), + Columns: []*model.ColumnInfo{ + {Name: ast.NewCIStr("c1"), Offset: 0, State: model.StatePublic}, + {Name: ast.NewCIStr("c2"), Offset: 1, State: model.StatePublic}, + {Name: ast.NewCIStr("c3"), Offset: 2, State: model.StatePublic, GeneratedExprString: "c1 + c2", GeneratedStored: false}, + {Name: ast.NewCIStr("c4"), Offset: 3, State: model.StatePublic, GeneratedExprString: "c1 + c2", GeneratedStored: true}, + }, + } + idxInfo := &model.IndexInfo{ + Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("c1"), Offset: 0}, + }, + } + + tests := []struct { + cond string + expected []*model.IndexColumn + expectedColumnInWithVirtualGeneratedColumn []*model.IndexColumn + }{ + { + cond: "c1 AND c2", + expected: []*model.IndexColumn{{Name: ast.NewCIStr("c1"), Offset: 0}, {Name: ast.NewCIStr("c2"), Offset: 1}}, + expectedColumnInWithVirtualGeneratedColumn: []*model.IndexColumn{{Name: ast.NewCIStr("c1"), Offset: 0}, {Name: ast.NewCIStr("c2"), Offset: 1}}, + }, + { + cond: "c1 > 100", + expected: []*model.IndexColumn{{Name: ast.NewCIStr("c1"), Offset: 0}}, + expectedColumnInWithVirtualGeneratedColumn: []*model.IndexColumn{{Name: ast.NewCIStr("c1"), Offset: 0}}, + }, + { + cond: "c3 > 50", + expected: []*model.IndexColumn{{Name: ast.NewCIStr("c3"), Offset: 2}}, + expectedColumnInWithVirtualGeneratedColumn: []*model.IndexColumn{{Name: ast.NewCIStr("c1"), Offset: 0}, {Name: ast.NewCIStr("c2"), Offset: 1}, {Name: ast.NewCIStr("c3"), Offset: 2}}, + }, + { + cond: "c4 > 50", + expected: []*model.IndexColumn{{Name: ast.NewCIStr("c4"), Offset: 3}}, + expectedColumnInWithVirtualGeneratedColumn: []*model.IndexColumn{{Name: ast.NewCIStr("c4"), Offset: 3}}, + }, + } + + for _, tt := range tests { + t.Run(tt.cond, func(t *testing.T) { + idxInfo.ConditionExprString = tt.cond + + got, err := tables.ExtractColumnsFromCondition(ctx, idxInfo, tblInfo, false) + require.NoError(t, err) + require.ElementsMatch(t, tt.expected, got) + + got, err = tables.ExtractColumnsFromCondition(ctx, idxInfo, tblInfo, true) + require.NoError(t, err) + require.ElementsMatch(t, tt.expectedColumnInWithVirtualGeneratedColumn, got) + }) + } +} + +func TestDedupIndexColumns4Test(t *testing.T) { + colCount := 100 + allCols := make([]*model.IndexColumn, 0, colCount) + for i := range colCount { + allCols = append(allCols, &model.IndexColumn{ + Name: ast.NewCIStr(fmt.Sprintf("c%d", i)), + Offset: i, + }) + } + + // add many existing columns and some duplicated columns + cols := make([]*model.IndexColumn, 0, colCount*2) + for i := range colCount { + cols = append(cols, allCols[i]) + } + for range colCount { + cols = append(cols, allCols[rand.IntN(colCount)]) + } + + result := tables.DedupIndexColumns(cols) + require.Equal(t, allCols, result) +} + +func TestPartialIndexDMLDuringDDL(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + type testCase struct { + ddl string + } + testCases := []testCase{ + { + ddl: "alter table t modify column b int first", + }, + { + ddl: "alter table t modify column b int unsigned first", + }, + { + ddl: "alter table t modify column b int unsigned", + }, + { + ddl: "alter table t change column b e int unsigned", + }, + { + ddl: "alter table t change column b e int unsigned, change column d f int unsigned", + }, + } + for _, tc := range testCases { + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, d int, key testidx(a) where c > 4)") + + testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/ddl/afterWaitSchemaSynced", func(job *model.Job) { + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("insert into t(a, c) values (1, 2), (2, 3), (3, 4), (4, 5)") + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 1) + // the hint here is a workaround before we have a valid planner for partial index + tk.MustExec("update /*+ ignore_index(t, testidx) */ t set c = 5 where a = 1") + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 2) + tk.MustExec("delete from t where a = 1") + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 1) + tk.MustExec("delete from t") + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 0) + }) + tk.MustExec(tc.ddl) + testfailpoint.Disable(t, "github.com/pingcap/tidb/pkg/ddl/afterWaitSchemaSynced") + + tk.MustExec("drop table t") + } +} + +func TestPartialIndexDMLUniqueness(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, unique key testidx(a) where c >= 2)") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + testutil.CheckIndexKVCount(t, tk, dom, "t", "testidx", 2) + tk.MustGetErrCode("insert into t values (2, 4, 4)", errno.ErrDupEntry) + tk.MustExec("insert into t values (2, 4, 1)") +} diff --git a/pkg/table/tables/tables.go b/pkg/table/tables/tables.go index 26c6e89af46be..e1ee39f9978e9 100644 --- a/pkg/table/tables/tables.go +++ b/pkg/table/tables/tables.go @@ -264,7 +264,10 @@ func initTableIndices(t *TableCommon) error { } // Use partition ID for index, because TableCommon may be table or partition. - idx := NewIndex(t.physicalTableID, tblInfo, idxInfo) + idx, err := NewIndex(t.physicalTableID, tblInfo, idxInfo) + if err != nil { + return err + } intest.AssertFunc(func() bool { // `TableCommon.indices` is type of `[]table.Index` to implement interface method `Table.Indices`. // However, we have an assumption that the specific type of each element in it should always be `*index`. @@ -571,6 +574,16 @@ func (t *TableCommon) rebuildUpdateRecordIndices( if idx.Meta().IsTiFlashLocalIndex() { continue } + + oldDataMeetPartialCondition, err := idx.MeetPartialCondition(oldData) + if err != nil { + return err + } + if !oldDataMeetPartialCondition { + // If the partial index condition is not met, we don't need to delete it because + // it has never been written. + continue + } for _, ic := range idx.Meta().Columns { if !touched[ic.Offset] { continue @@ -604,9 +617,25 @@ func (t *TableCommon) rebuildUpdateRecordIndices( untouched = false break } + for _, ic := range idx.Meta().AffectColumn { + if !touched[ic.Offset] { + continue + } + untouched = false + break + } if untouched && opt.SkipWriteUntouchedIndices() { continue } + newDataMeetPartialCondition, err := idx.MeetPartialCondition(newData) + if err != nil { + return err + } + if !newDataMeetPartialCondition { + // If the partial index condition is not met, we don't need to build the new index. + continue + } + newVs, err := idx.FetchValues(newData, nil) if err != nil { return err @@ -965,10 +994,18 @@ func (t *TableCommon) addIndices(sctx table.MutateContext, recordID kv.Handle, r if t.meta.IsCommonHandle && v.Meta().Primary { continue } - // We declared `err` here to make sure `indexVals` is assigned with `=` instead of `:=`. + + meetPartialCondition, err := v.MeetPartialCondition(r) + if err != nil { + return nil, err + } + if !meetPartialCondition { + continue + } + + // We should make sure `indexVals` is assigned with `=` instead of `:=`. // The latter one will create a new variable that shadows the outside `indexVals` that makes `indexVals` outside // always nil, and we cannot reuse it. - var err error indexVals, err = v.FetchValues(r, indexVals) if err != nil { return nil, err @@ -1225,6 +1262,20 @@ func (t *TableCommon) removeRowIndices(ctx table.MutateContext, txn kv.Transacti if v.Meta().IsTiFlashLocalIndex() { continue } + intest.AssertFunc(func() bool { + // if the index is partial index, it shouldn't have index layout. + return !(opt.HasIndexesLayout() && v.Meta().HasCondition()) + }) + meetPartialCondition, err := v.MeetPartialCondition(rec) + if err != nil { + return err + } + if !meetPartialCondition { + // If the partial index condition is not met, we don't need to delete it because + // it has never been written. + continue + } + var vals []types.Datum if opt.HasIndexesLayout() { vals, err = fetchIndexRow(v.Meta(), rec, nil, opt.GetIndexLayout(v.Meta().ID)) @@ -1248,7 +1299,6 @@ func (t *TableCommon) removeRowIndices(ctx table.MutateContext, txn kv.Transacti return nil } -// buildIndexForRow implements table.Table BuildIndexForRow interface. func (t *TableCommon) buildIndexForRow(ctx table.MutateContext, h kv.Handle, vals []types.Datum, newData []types.Datum, idx *index, txn kv.Transaction, untouched bool, opt *table.CreateIdxOpt) error { rsData := TryGetHandleRestoredDataWrapper(t.meta, newData, nil, idx.Meta()) if _, err := idx.create(ctx, txn, vals, h, rsData, untouched, opt); err != nil { diff --git a/pkg/table/tables/testutil/BUILD.bazel b/pkg/table/tables/testutil/BUILD.bazel new file mode 100644 index 0000000000000..2e9c7b51fab68 --- /dev/null +++ b/pkg/table/tables/testutil/BUILD.bazel @@ -0,0 +1,17 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "testutil", + srcs = ["indexcheck.go"], + importpath = "github.com/pingcap/tidb/pkg/table/tables/testutil", + visibility = ["//visibility:public"], + deps = [ + "//pkg/domain", + "//pkg/parser/ast", + "//pkg/sessiontxn", + "//pkg/table", + "//pkg/tablecodec", + "//pkg/testkit", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/table/tables/testutil/indexcheck.go b/pkg/table/tables/testutil/indexcheck.go new file mode 100644 index 0000000000000..7c6d3075efdbf --- /dev/null +++ b/pkg/table/tables/testutil/indexcheck.go @@ -0,0 +1,72 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "context" + "testing" + "time" + + "github.com/pingcap/tidb/pkg/domain" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/sessiontxn" + "github.com/pingcap/tidb/pkg/table" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/testkit" + "github.com/stretchr/testify/require" +) + +// CheckIndexKVCount checks the number of index key-value pairs in the specified index of the specified table. +func CheckIndexKVCount(t *testing.T, tk *testkit.TestKit, dom *domain.Domain, tableName string, indexName string, expected int) { + tbl, err := dom.InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr(tableName)) + require.NoError(t, err) + require.NotNil(t, tbl) + var idx table.Index + for _, i := range tbl.Indices() { + if i.Meta().Name.L == indexName { + idx = i + break + } + } + + minimumKey, _, err := tablecodec.GenIndexKey(time.Local, tbl.Meta(), idx.Meta(), tbl.Meta().ID, nil, nil, nil) + require.NoError(t, err) + + tk.MustExec("BEGIN") + defer tk.MustExec("COMMIT") + txnManager := sessiontxn.GetTxnManager(tk.Session()) + snapshot, err := txnManager.GetSnapshotWithStmtReadTS() + require.NoError(t, err) + + iter, err := snapshot.Iter(minimumKey, nil) + require.NoError(t, err) + defer iter.Close() + count := 0 + for iter.Valid() { + key := iter.Key() + if !tablecodec.IsIndexKey(key) { + break + } + _, idxID, _, err := tablecodec.DecodeIndexKey(key) + require.NoError(t, err) + if idxID != idx.Meta().ID { + break + } + count++ + err = iter.Next() + require.NoError(t, err) + } + require.Equal(t, expected, count) +} diff --git a/pkg/util/dbterror/ddl_terror.go b/pkg/util/dbterror/ddl_terror.go index b0cfc183bd424..446991434e37a 100644 --- a/pkg/util/dbterror/ddl_terror.go +++ b/pkg/util/dbterror/ddl_terror.go @@ -80,6 +80,13 @@ var ( ErrUnsupportedAlterTableOption = ClassDDL.NewStdErr(mysql.ErrUnsupportedDDLOperation, parser_mysql.Message("This type of ALTER TABLE is currently unsupported", nil)) // ErrUnsupportedAlterCacheForSysTable means we don't support the alter cache for system table. ErrUnsupportedAlterCacheForSysTable = ClassDDL.NewStdErr(mysql.ErrUnsupportedDDLOperation, parser_mysql.Message("ALTER table cache for tables in system database is currently unsupported", nil)) +<<<<<<< HEAD +======= + // ErrUnsupportedAddPartialIndex the partial index condition is not supported + ErrUnsupportedAddPartialIndex = ClassDDL.NewStdErr(mysql.ErrUnsupportedDDLOperation, parser_mysql.Message(fmt.Sprintf(mysql.MySQLErrName[mysql.ErrUnsupportedDDLOperation].Raw, "add partial index: %s"), nil)) + // ErrModifyColumnReferencedByPartialCondition is used when a column is referenced by a partial index condition. + ErrModifyColumnReferencedByPartialCondition = ClassDDL.NewStd(mysql.ErrModifyColumnReferencedByPartialCondition) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) // ErrBlobKeyWithoutLength is used when BLOB is used as key but without a length. ErrBlobKeyWithoutLength = ClassDDL.NewStd(mysql.ErrBlobKeyWithoutLength) // ErrKeyPart0 is used when key parts length is 0. diff --git a/tests/integrationtest/r/ddl/integration.result b/tests/integrationtest/r/ddl/integration.result index e8985f2e023d6..c7a10eaa5f56c 100644 --- a/tests/integrationtest/r/ddl/integration.result +++ b/tests/integrationtest/r/ddl/integration.result @@ -147,3 +147,31 @@ PARTITION `p1970` VALUES LESS THAN (1980), PARTITION `p1980` VALUES LESS THAN (1990), PARTITION `p1990` VALUES LESS THAN (2000)); ALTER TABLE members REORGANIZE PARTITION `p1990` INTO (PARTITION p1995 VALUES LESS THAN (1995), PARTITION p2000 VALUES LESS THAN (2010)); +create table t (a int, b int, key testidx(b) where a > 2); +insert into t values (1, 2); +alter table t modify column a bigint; +Error 8272 (HY000): Cannot drop, change or modify column 'a': it is referenced in partial index 'testidx' +alter table t drop column a; +Error 8272 (HY000): Cannot drop, change or modify column 'a': it is referenced in partial index 'testidx' +alter table t change column a c int; +Error 8272 (HY000): Cannot drop, change or modify column 'a': it is referenced in partial index 'testidx' +alter table t drop index testidx; +alter table t modify column b bigint; +alter table t drop column b; +drop table t; +drop table if exists t; +create table t (a int, b int, key testidx(b) where _tidb_rowid > 2); +Error 8200 (HY000): Unsupported add partial index: column name `_tidb_rowid` referenced in partial index condition is not found in table +create table t (a int, b int); +alter table t add index testidx(b) where _tidb_rowid > 2; +Error 8200 (HY000): Unsupported add partial index: column name `_tidb_rowid` referenced in partial index condition is not found in table +drop table if exists t; +create table t (a int, b int, key testidx(b) where a > 5) partition by range (b) (partition p0 values less than (5)); +Error 8200 (HY000): Unsupported add partial index: partial index is not supported on partitioned table +create table t (a int, b int) partition by range (b) (partition p0 values less than (5)); +create index testidx on t(b) where a > 5; +Error 8200 (HY000): Unsupported add partial index: partial index on partitioned table is not supported +drop table t; +create table t (a int, b int, key testidx(b) where a > 5); +alter table t partition by range (b) (partition p0 values less than (5)); +Error 8200 (HY000): Unsupported add partial index: partial index is not supported on partitioned table diff --git a/tests/integrationtest/r/executor/admin.result b/tests/integrationtest/r/executor/admin.result index 7cd0daef3a967..86cccb6ba468d 100644 --- a/tests/integrationtest/r/executor/admin.result +++ b/tests/integrationtest/r/executor/admin.result @@ -218,3 +218,16 @@ insert into admin_test with recursive cte(a, b) as (select 1, 1 union select a+1 select /*+ read_from_storage(tikv[`executor__admin`.`admin_test`]) */ bit_xor(crc32(md5(concat_ws(0x2, `c1`, `c2`)))), ((cast(crc32(md5(concat_ws(0x2, `c1`))) as signed) - 9223372036854775807) div 1 % 1024), count(*) from `executor__admin`.`admin_test` use index() where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `c1`))) as signed) - 9223372036854775807) div 1 % 1024); select bit_xor(crc32(md5(concat_ws(0x2, `c1`, `c2`)))), ((cast(crc32(md5(concat_ws(0x2, `c1`))) as signed) - 9223372036854775807) div 1 % 1024), count(*) from `executor__admin`.`admin_test` use index(`c2`) where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `c1`))) as signed) - 9223372036854775807) div 1 % 1024); set cte_max_recursion_depth=default; +drop table if exists t; +create table t (col1 int, key idx(col1) where col1 > 0); +insert into t values (NULL); +insert into t values (-1); +insert into t values (1); +set tidb_enable_fast_table_check = 'OFF'; +admin check index t idx; +Error 8273 (HY000): Validation of partial indexes requires tidb_enable_fast_table_check=ON +admin check table t; +Error 8273 (HY000): Validation of partial indexes requires tidb_enable_fast_table_check=ON +set tidb_enable_fast_table_check = 'ON'; +admin check index t idx; +admin check table t; diff --git a/tests/integrationtest/t/ddl/integration.test b/tests/integrationtest/t/ddl/integration.test index 4c8e26c3b64cc..8f9f2db08e216 100644 --- a/tests/integrationtest/t/ddl/integration.test +++ b/tests/integrationtest/t/ddl/integration.test @@ -134,3 +134,37 @@ PARTITION BY RANGE (YEAR(`dob`)) PARTITION `p1980` VALUES LESS THAN (1990), PARTITION `p1990` VALUES LESS THAN (2000)); ALTER TABLE members REORGANIZE PARTITION `p1990` INTO (PARTITION p1995 VALUES LESS THAN (1995), PARTITION p2000 VALUES LESS THAN (2010)); + +# TestAvoidDropModifyChangeColumnReferencedByPartialCondition +create table t (a int, b int, key testidx(b) where a > 2); +insert into t values (1, 2); +--error 8272 +alter table t modify column a bigint; +--error 8272 +alter table t drop column a; +--error 8272 +alter table t change column a c int; +alter table t drop index testidx; +alter table t modify column b bigint; +alter table t drop column b; +drop table t; + +# TestRowIDIsNotAllowedForPartialIndex +drop table if exists t; +--error 8200 +create table t (a int, b int, key testidx(b) where _tidb_rowid > 2); +create table t (a int, b int); +--error 8200 +alter table t add index testidx(b) where _tidb_rowid > 2; + +# TestPartitionTableNotSupportedForPartialIndex +drop table if exists t; +--error 8200 +create table t (a int, b int, key testidx(b) where a > 5) partition by range (b) (partition p0 values less than (5)); +create table t (a int, b int) partition by range (b) (partition p0 values less than (5)); +--error 8200 +create index testidx on t(b) where a > 5; +drop table t; +create table t (a int, b int, key testidx(b) where a > 5); +--error 8200 +alter table t partition by range (b) (partition p0 values less than (5)); \ No newline at end of file diff --git a/tests/integrationtest/t/executor/admin.test b/tests/integrationtest/t/executor/admin.test index fcb9eeb5b5065..de1d01f555054 100644 --- a/tests/integrationtest/t/executor/admin.test +++ b/tests/integrationtest/t/executor/admin.test @@ -226,3 +226,18 @@ select /*+ read_from_storage(tikv[`executor__admin`.`admin_test`]) */ bit_xor(cr select bit_xor(crc32(md5(concat_ws(0x2, `c1`, `c2`)))), ((cast(crc32(md5(concat_ws(0x2, `c1`))) as signed) - 9223372036854775807) div 1 % 1024), count(*) from `executor__admin`.`admin_test` use index(`c2`) where 0 = 0 group by ((cast(crc32(md5(concat_ws(0x2, `c1`))) as signed) - 9223372036854775807) div 1 % 1024); set cte_max_recursion_depth=default; --enable_result_log + +# TestAdminCheckPartialIndex +drop table if exists t; +create table t (col1 int, key idx(col1) where col1 > 0); +insert into t values (NULL); +insert into t values (-1); +insert into t values (1); +set tidb_enable_fast_table_check = 'OFF'; +--error 8273 +admin check index t idx; +--error 8273 +admin check table t; +set tidb_enable_fast_table_check = 'ON'; +admin check index t idx; +admin check table t; diff --git a/tests/realtikvtest/addindextest3/operator_test.go b/tests/realtikvtest/addindextest3/operator_test.go index af5973c38f507..d1b6badae5d31 100644 --- a/tests/realtikvtest/addindextest3/operator_test.go +++ b/tests/realtikvtest/addindextest3/operator_test.go @@ -115,7 +115,11 @@ func TestBackfillOperators(t *testing.T) { ctx := context.Background() wctx := workerpool.NewContext(ctx) src := testutil.NewOperatorTestSource(opTasks...) +<<<<<<< HEAD scanOp := ddl.NewTableScanOperator(wctx, sessPool, copCtx, srcChkPool, 3, 0, nil, nil) +======= + scanOp := ddl.NewTableScanOperator(opCtx, sessPool, copCtx, srcChkPool, 3, 0, &model.DDLReorgMeta{}, nil, &execute.TestCollector{}) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) sink := testutil.NewOperatorTestSink[ddl.IndexRecordChunk]() operator.Compose[ddl.TableScanTask](src, scanOp) @@ -159,7 +163,8 @@ func TestBackfillOperators(t *testing.T) { }, } pTbl := tbl.(table.PhysicalTable) - index := tables.NewIndex(pTbl.GetPhysicalID(), tbl.Meta(), idxInfo) + index, err := tables.NewIndex(pTbl.GetPhysicalID(), tbl.Meta(), idxInfo) + require.NoError(t, err) cfg, bd, err := ingest.CreateLocalBackend(context.Background(), store, realJob, false, false, 0) require.NoError(t, err) defer bd.Close() @@ -375,7 +380,7 @@ func prepare(t *testing.T, tk *testkit.TestKit, dom *domain.Domain, regionCnt in tblInfo := tbl.Meta() idxInfo = tblInfo.FindIndexByName("idx") sctx := tk.Session() - copCtx, err = ddl.NewReorgCopContext(dom.Store(), ddl.NewDDLReorgMeta(sctx), tblInfo, []*model.IndexInfo{idxInfo}, "") + copCtx, err = ddl.NewReorgCopContext(ddl.NewDDLReorgMeta(sctx), tblInfo, []*model.IndexInfo{idxInfo}, "") require.NoError(t, err) require.IsType(t, copCtx, &copr.CopContextSingleIndex{}) return tbl, idxInfo, start, end, copCtx @@ -416,8 +421,13 @@ func TestTuneWorkerPoolSize(t *testing.T) { // Test TableScanOperator. { ctx := context.Background() +<<<<<<< HEAD wctx := workerpool.NewContext(ctx) scanOp := ddl.NewTableScanOperator(wctx, sessPool, copCtx, nil, 2, 0, nil, nil) +======= + opCtx, cancel := ddl.NewDistTaskOperatorCtx(ctx) + scanOp := ddl.NewTableScanOperator(opCtx, sessPool, copCtx, nil, 2, 0, &model.DDLReorgMeta{}, nil, &execute.TestCollector{}) +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) scanOp.Open() require.Equal(t, scanOp.GetWorkerPoolSize(), int32(2)) @@ -435,7 +445,8 @@ func TestTuneWorkerPoolSize(t *testing.T) { ctx := context.Background() wctx := workerpool.NewContext(ctx) pTbl := tbl.(table.PhysicalTable) - index := tables.NewIndex(pTbl.GetPhysicalID(), tbl.Meta(), idxInfo) + index, err := tables.NewIndex(pTbl.GetPhysicalID(), tbl.Meta(), idxInfo) + require.NoError(t, err) cfg, bd, err := ingest.CreateLocalBackend(context.Background(), store, realJob, false, false, 0) require.NoError(t, err) defer bd.Close() diff --git a/tests/realtikvtest/addindextest4/BUILD.bazel b/tests/realtikvtest/addindextest4/BUILD.bazel index e666f47b0edaf..b95fd3ec84fa0 100644 --- a/tests/realtikvtest/addindextest4/BUILD.bazel +++ b/tests/realtikvtest/addindextest4/BUILD.bazel @@ -17,9 +17,15 @@ go_test( "//pkg/errno", "//pkg/kv", "//pkg/meta/model", +<<<<<<< HEAD +======= + "//pkg/parser/ast", + "//pkg/table/tables/testutil", +>>>>>>> 8c2781681a4 (ddl,tables: only write the index when it meets partial index condition (#62762)) "//pkg/testkit", "//pkg/testkit/testfailpoint", "//pkg/util", + "//pkg/util/dbterror", "//tests/realtikvtest", "@com_github_pingcap_failpoint//:failpoint", "@com_github_stretchr_testify//assert", diff --git a/tests/realtikvtest/addindextest4/partial_index_test.go b/tests/realtikvtest/addindextest4/partial_index_test.go new file mode 100644 index 0000000000000..49e0013bbeae9 --- /dev/null +++ b/tests/realtikvtest/addindextest4/partial_index_test.go @@ -0,0 +1,239 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package addindextest + +import ( + "context" + "testing" + + "github.com/pingcap/tidb/pkg/config/kerneltype" + "github.com/pingcap/tidb/pkg/domain" + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/table/tables/testutil" + "github.com/pingcap/tidb/pkg/testkit" + "github.com/pingcap/tidb/pkg/util/dbterror" + "github.com/pingcap/tidb/tests/realtikvtest" + "github.com/stretchr/testify/require" +) + +func findIndex(t *testing.T, dom *domain.Domain, tableName string, indexName string) *model.IndexInfo { + tbl, err := dom.InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr(tableName)) + require.NoError(t, err) + + idx := tbl.Meta().FindIndexByName(indexName) + return idx +} + +func validatePartialIndexExists(t *testing.T, dom *domain.Domain, tableName string, indexName string) { + idx := findIndex(t, dom, tableName, indexName) + require.NotNil(t, idx) + require.NotEmpty(t, idx.ConditionExprString) +} + +func TestPartialIndexDDL(t *testing.T) { + store, dom := realtikvtest.CreateMockStoreAndDomainAndSetup(t) + tk := testkit.NewTestKit(t, store) + + t.Run("TestCreatePartialIndex", func(t *testing.T) { + tk.MustExec("use test") + tk.MustExec("create table t1 (col1 int primary key, col2 int, key idx(col2) where col1 > 100);") + defer tk.MustExec("drop table if exists t1;") + validatePartialIndexExists(t, dom, "t1", "idx") + + tk.MustExec("create table t2 (col1 int primary key, col2 int);") + defer tk.MustExec("drop table if exists t2;") + tk.MustExec("create index idx on t2(col2) where col1 > 100;") + validatePartialIndexExists(t, dom, "t2", "idx") + + tk.MustExec("create table t3 (col1 int primary key, col2 int);") + defer tk.MustExec("drop table if exists t3;") + tk.MustExec("alter table t3 add index idx(col2) where col1 > 100;") + validatePartialIndexExists(t, dom, "t3", "idx") + + tk.MustExec("create table t4 like t1") + defer tk.MustExec("drop table if exists t4;") + validatePartialIndexExists(t, dom, "t4", "idx") + }) + + t.Run("TestValidationInPartialIndex", func(t *testing.T) { + tk.MustExec("use test") + tk.MustExec("create table t(col1 int primary key, col2 int, col3 varchar(255), col4 int as (col2 + 1));") + defer tk.MustExec("drop table if exists t;") + + type testCase struct { + idxDef string + errorCode int + } + testCases := []testCase{ + {"t(col2) where col2 = 1;", 0}, + {"t(col2) where col1 != 1;", 0}, + {"t(col2) where col1 > 1;", 0}, + {"t(col2) where col1 IS NULL;", 0}, + {"t(col2) where col1 IS NOT NULL;", 0}, + {"t(col2) where col1 IN (1,2,3,4,5);", 8200}, + {"t(col2) where col1 LIKE '1%';", 8200}, + {"t(col2) where col1 > col2;", 8200}, + {"t(col2) where col1 = NOW();", 8200}, + {"t(col2) where col1 = (select 1);", 8200}, + {"t(col2) where col4 = 4;", 8200}, + } + for _, tc := range testCases { + t.Run(tc.idxDef, func(t *testing.T) { + sql := "create index idx on " + tc.idxDef + + if tc.errorCode != 0 { + tk.MustGetErrCode(sql, tc.errorCode) + return + } + + tk.MustExec(sql) + validatePartialIndexExists(t, dom, "t", "idx") + tk.MustExec("drop index idx on t;") + }) + } + }) + + t.Run("TestIndexManagementForPartialIndex", func(t *testing.T) { + tk.MustExec("use test") + tk.MustExec("create table t(col1 int primary key, col2 int, col3 int);") + defer tk.MustExec("drop table if exists t;") + + tk.MustExec("create index idx on t(col3) where col2 = 1;") + validatePartialIndexExists(t, dom, "t", "idx") + tk.MustExec("alter table t rename index idx to idx2;") + validatePartialIndexExists(t, dom, "t", "idx2") + + tk.MustExec("alter table t change column col3 col4 int;") + validatePartialIndexExists(t, dom, "t", "idx2") + idx := findIndex(t, dom, "t", "idx2") + require.Equal(t, "col4", idx.Columns[0].Name.O) + tk.MustExec("alter table t modify column col4 int unsigned;") + validatePartialIndexExists(t, dom, "t", "idx2") + idx = findIndex(t, dom, "t", "idx2") + require.Equal(t, "col4", idx.Columns[0].Name.O) + tk.MustExec("alter table t drop column col4;") + idx = findIndex(t, dom, "t", "idx2") + require.Nil(t, idx) + + tk.MustExec("create index idx on t(col2) where col2 = 1;") + validatePartialIndexExists(t, dom, "t", "idx") + tk.MustExec("drop index idx on t;") + idx = findIndex(t, dom, "t", "idx") + require.Nil(t, idx) + }) + + t.Run("TestManipulateColumnReferencedByPartialIndex", func(t *testing.T) { + tk.MustExec("use test") + tk.MustExec("create table t(col1 int primary key, col2 int, col3 int);") + + tk.MustExec("create index idx on t(col3) where col2 = 1;") + validatePartialIndexExists(t, dom, "t", "idx") + + tk.MustGetDBError("alter table t drop column col2;", dbterror.ErrModifyColumnReferencedByPartialCondition) + tk.MustGetDBError("alter table t change column col2 col4 int;", dbterror.ErrModifyColumnReferencedByPartialCondition) + tk.MustGetDBError("alter table t modify column col2 int unsigned;", dbterror.ErrModifyColumnReferencedByPartialCondition) + + tk.MustExec("drop table t;") + tk.MustExec("create table t(col1 int primary key, col2 int, col3 int, key t(col3) where col2 = 1);") + + tk.MustGetDBError("alter table t drop column col2;", dbterror.ErrModifyColumnReferencedByPartialCondition) + tk.MustGetDBError("alter table t change column col2 col4 int;", dbterror.ErrModifyColumnReferencedByPartialCondition) + tk.MustGetDBError("alter table t modify column col2 int unsigned;", dbterror.ErrModifyColumnReferencedByPartialCondition) + tk.MustExec("drop table t;") + }) + + t.Run("TestPartialIndexCanOnlyBeCreatedWithFastReorg", func(t *testing.T) { + if kerneltype.IsNextGen() { + t.Skip("Cannot disable fast-reorg in next-gen kernel") + } + + tk.MustExec("use test;") + tk.MustExec("create table t (a int, b int, c int, primary key (a))") + defer tk.MustExec("drop table if exists t;") + tk.MustExec("insert into t(a, b, c) values (1, 2, 3), (2, 3, 4), (3, 4, 5)") + + // Disable fast-reorg + tk.MustExec("set global tidb_ddl_enable_fast_reorg = 0") + tk.MustGetDBError("alter table t add index idx1(a) where c > 3", dbterror.ErrUnsupportedAddPartialIndex) + tk.MustExec("set global tidb_ddl_enable_fast_reorg = 1") + + tk.MustExec("alter table t add index idx0(a)") + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx0", 3) + jobs := tk.MustQuery("admin show ddl jobs 1").Rows() + require.Equal(t, jobs[0][7], "3") + }) + + t.Run("TestAddPartialIndex", func(t *testing.T) { + tk.MustExec("use test;") + tk.MustExec("create table t (a int, b int, c int, primary key (a))") + defer tk.MustExec("drop table if exists t;") + tk.MustExec("insert into t(a, b, c) values (1, 2, 3), (2, 3, 4), (3, 4, 5)") + + // Partial index with proper condition + tk.MustExec("alter table t add index idx1(a) where c >= 5") + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx1", 1) + jobs := tk.MustQuery("admin show ddl jobs 1").Rows() + require.Equal(t, jobs[0][7], "3") + + // Partial index with condition that no row meets + tk.MustExec("alter table t add index idx2(a) where c >= 6") + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx2", 0) + jobs = tk.MustQuery("admin show ddl jobs 1").Rows() + require.Equal(t, jobs[0][7], "3") + + // Multi-schema change add multiple partial indexes + tk.MustExec("alter table t add index idx5(a) where c >= 4, add index idx6(a) where c >= 5") + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx5", 2) + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx6", 1) + jobs = tk.MustQuery("admin show ddl jobs 1").Rows() + require.Equal(t, jobs[1][7], "3") + + // Pushdown is disabled for `not` function, so the DDL cannot pushdown + tk.MustExec("INSERT INTO mysql.expr_pushdown_blacklist VALUES('not','tikv','');") + tk.MustExec("ADMIN reload expr_pushdown_blacklist;") + tk.MustExec("alter table t add index idx7(a) where b is not null;") + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx7", 3) + tk.MustExec("DELETE FROM mysql.expr_pushdown_blacklist WHERE name='not' AND store_type='tikv';") + + // Multi-schema change add multiple indexes, including partial index and normal index + tk.MustExec("alter table t add index idx8(a), add index idx9(a) where c >= 5") + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx8", 3) + testutil.CheckIndexKVCount(t, tk, dom, "t", "idx9", 1) + jobs = tk.MustQuery("admin show ddl jobs 1").Rows() + require.Equal(t, jobs[1][7], "3") + + // Create index on table with `_tidb_rowid` column + tk.MustExec("create table t1 (a int, b int, c int)") + tk.MustExec("insert into t1(a, b, c) values (1, 2, 3), (2, 3, 4), (3, 4, 5)") + tk.MustExec("alter table t1 add index idx1(a) where a > 1") + testutil.CheckIndexKVCount(t, tk, dom, "t1", "idx1", 2) + + // Create normal index, the row count is still correct. + tk.MustExec("alter table t1 add index idx2(a)") + testutil.CheckIndexKVCount(t, tk, dom, "t1", "idx2", 3) + jobs = tk.MustQuery("admin show ddl jobs 1").Rows() + require.Equal(t, jobs[0][7], "3") + }) + + t.Run("TestValidateColumnExistsInAddIndex", func(t *testing.T) { + tk.MustExec("use test;") + tk.MustExec("create table t (a int, b int);") + defer tk.MustExec("drop table if exists t;") + tk.MustExec("alter table t add index idx_b(b) where a = 1;") + tk.MustGetDBError("alter table t add index idx_b_2(b) where c = 1;", + dbterror.ErrUnsupportedAddPartialIndex) + }) +}