diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java new file mode 100644 index 000000000000..981227491773 --- /dev/null +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.core.starter.flink.execution; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import org.apache.seatunnel.api.common.JobContext; +import org.apache.seatunnel.api.source.SupportSchemaEvolution; +import org.apache.seatunnel.translation.flink.schema.SchemaOperator; +import org.apache.seatunnel.translation.flink.schema.SchemaOperator13; + +import java.net.URL; +import java.util.List; + +/** + * Flink 1.13-specific source execution processor. Shadows the common {@code SourceExecuteProcessor} + * at runtime (same package, same class name) to provide two Flink 1.13-specific behaviours without + * using reflection: + * + *
The base {@link SchemaOperator} carries none of this timer infrastructure; Flink 1.15 and + * later use that base class directly because checkpointing behaves correctly there. + */ +@Slf4j +public class SchemaOperator13 extends SchemaOperator { + + /** + * Guards against double-registration. All accesses happen on the Flink task thread + * (processElement, timer callbacks, notifyCheckpointComplete) + */ + private boolean fallbackTimerPending = false; + + public SchemaOperator13(String jobId, SupportSchemaEvolution source, Config pluginConfig) { + super(jobId, source, pluginConfig); + } + + /** + * Registers a processing-time timer that will call {@link #handleFallbackTimerOnTaskThread()} + * on the Flink task thread after {@link #CHECKPOINT_STALL_TIMEOUT_MS} milliseconds. + * + *
Using {@link ProcessingTimeService#registerTimer} instead of a background {@code + * ScheduledExecutorService} achieves two goals: + * + *
If a timer is already pending this call is a no-op to prevent duplicate firings. + */ + @Override + protected void scheduleFallbackTimer() { + if (fallbackTimerPending) { + return; + } + fallbackTimerPending = true; + + ProcessingTimeService pts = getProcessingTimeService(); + long fireAt = pts.getCurrentProcessingTime() + CHECKPOINT_STALL_TIMEOUT_MS; + + pts.registerTimer( + fireAt, + (ProcessingTimeCallback) + timestamp -> { + fallbackTimerPending = false; + try { + handleFallbackTimerOnTaskThread(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + log.error( + "Fallback schema-change timer interrupted for job {}", + jobId, + e); + } + }); + + log.debug( + "Registered Flink processing-time fallback timer to fire in {}ms for job {}", + CHECKPOINT_STALL_TIMEOUT_MS, + jobId); + } +} diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/SchemaOperator.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/SchemaOperator.java index 0f993dcda0ff..0ce26072b3fa 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/SchemaOperator.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/SchemaOperator.java @@ -1,12 +1,12 @@ /* * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with + * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -54,7 +54,7 @@ /** * Operator placed after the source to handle schema evolution. * - *
schema change events are NOT processed synchronously in {@link #processElement}. Instead, they + *
Schema change events are NOT processed synchronously in {@link #processElement}. Instead, they * are buffered and deferred until an additional checkpoint cycle has completed after the first * checkpoint that observed the pending DDL. This wait ensures that when the sink executes ALTER * TABLE, all XA transactions from prior checkpoint cycles have been fully committed by the {@code @@ -64,6 +64,13 @@ * *
Per checkpoint cycle, at most ONE schema change is applied. If multiple DDLs arrive between * two checkpoints, they are processed across successive checkpoint cycles. + * + *
Flink 1.13 cannot continue checkpointing after some source subtasks have finished. When
+ * high-parallelism CDC jobs hit that condition, pending schema changes would otherwise stay blocked
+ * forever. Subclasses may override {@link #scheduleFallbackTimer()} to register a version-specific
+ * timer that detects the stall and re-enters the task thread via {@link
+ * #handleFallbackTimerOnTaskThread()} so the deferred DDL can still be applied safely. The base
+ * implementation is a no-op, keeping the common module free of version-specific overhead.
*/
@Slf4j
public class SchemaOperator extends AbstractStreamOperator Safety fence: the DDL is applied only when at least one checkpoint has already completed
+ * after the schema event ({@code firstSeenCheckpointId >= 0}). This preserves the guarantee
+ * that XA transactions from the earlier checkpoint cycle have finished before ALTER TABLE runs.
+ * If that fence has not been crossed yet, the fallback reschedules itself by calling {@link
+ * #scheduleFallbackTimer()} and returns without applying anything.
+ */
+ protected void handleFallbackTimerOnTaskThread() throws InterruptedException {
+ if (!schemaChangePending || pendingQueue.isEmpty()) {
+ return;
+ }
+
+ if (lastCheckpointCompletedMs > 0
+ && System.currentTimeMillis() - lastCheckpointCompletedMs
+ < CHECKPOINT_STALL_TIMEOUT_MS) {
+ scheduleFallbackTimer();
+ return;
+ }
+
+ BufferedRecord head = advancePastDataRecords();
+ if (head == null) {
+ return;
+ }
+
+ if (firstSeenCheckpointId < 0) {
+ log.info(
+ "Fallback timer fired but no checkpoint has completed after schema event "
+ + "for table {} (epoch {}). Rescheduling fallback to preserve "
+ + "checkpoint-completion safety fence.",
+ head.schemaEvent.tableIdentifier(),
+ head.schemaEvent.getCreatedTime());
+ scheduleFallbackTimer();
+ return;
+ }
+
+ log.warn(
+ "Checkpoint stall detected after first post-DDL checkpoint {}. "
+ + "Applying deferred DDL for table {} (epoch {}) via fallback timer. "
+ + "Note: data committed via normal Flink checkpoint lifecycle may be "
+ + "delayed until checkpoints resume.",
+ firstSeenCheckpointId,
+ head.schemaEvent.tableIdentifier(),
+ head.schemaEvent.getCreatedTime());
+
+ applyNextPendingSchemaChange();
+ }
+
+ /**
+ * Schedules a fallback timer that will call {@link #handleFallbackTimerOnTaskThread()} if
+ * checkpoints stall before the pending schema change can be applied.
+ *
+ * The base implementation is a no-op: version-specific subclasses (e.g. {@code
+ * SchemaOperator13}) override this to register a timer via {@code ProcessingTimeService},
+ * keeping the common module free of version-specific timer infrastructure and reflection.
+ */
+ protected void scheduleFallbackTimer() {
+ // no-op by default; overridden in version-specific subclasses
+ }
+
+ private BufferedRecord advancePastDataRecords() {
+ BufferedRecord head = pendingQueue.peek();
+ while (head != null && !head.isSchemaChange) {
+ output.collect(new StreamRecord<>(head.row, head.timestamp));
+ pendingQueue.poll();
+ head = pendingQueue.peek();
+ }
+ if (head == null) {
+ schemaChangePending = false;
+ firstSeenCheckpointId = -1L;
+ }
+ return head;
+ }
+
+ private void applyNextPendingSchemaChange() throws InterruptedException {
+ BufferedRecord head = pendingQueue.peek();
+ if (head == null || !head.isSchemaChange) {
+ return;
+ }
+
+ SchemaChangeEvent event = head.schemaEvent;
+ TableIdentifier tableId = event.tableIdentifier();
+ long eventTime = event.getCreatedTime();
+
if (lastProcessedEventTime != null && eventTime <= lastProcessedEventTime) {
log.warn(
"Skipping outdated schema change event (epoch {} <= last processed {})",
@@ -294,7 +400,6 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception {
"Schema change for table {} (epoch {}) confirmed by all sink subtasks.",
tableId,
eventTime);
-
pendingQueue.poll();
firstSeenCheckpointId = -1L;
@@ -323,6 +428,7 @@ private void drainDataUntilNextSchemaChange() {
"Released {} buffered data records. Another schema change pending, "
+ "waiting for next checkpoint.",
released);
+ scheduleFallbackTimer();
return;
}
pendingQueue.poll();
@@ -474,11 +580,6 @@ private void sendSchemaChangeEventToDownstream(SchemaChangeEvent schemaChangeEve
output.collect(new StreamRecord<>(broadcastRow));
}
- @Override
- public void close() throws Exception {
- super.close();
- }
-
static class BufferedRecord {
final boolean isSchemaChange;
final SeaTunnelRow row;
diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/coordinator/LocalSchemaCoordinator.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/coordinator/LocalSchemaCoordinator.java
index 8d99f433a391..3cd96d43dc6f 100644
--- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/coordinator/LocalSchemaCoordinator.java
+++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/schema/coordinator/LocalSchemaCoordinator.java
@@ -41,6 +41,12 @@
* Local coordinator for schema change synchronization. This coordinator only manages temporary
* communication between SchemaOperator and sink subtasks. All persistent state is managed by
* BroadcastSchemaSinkOperator in Flink State.
+ *
+ * Schema changes (DDL like ALTER TABLE) are database-level operations that only need to be
+ * executed once. In Flink's parallel execution model, SchemaOperator sends schema change events via
+ * output.collect() which routes to only ONE downstream subtask based on partitioning. Therefore,
+ * this coordinator completes schema change requests when ANY single subtask successfully applies
+ * the change, rather than waiting for all subtasks.
*/
@Slf4j
public class LocalSchemaCoordinator {
@@ -138,24 +144,27 @@ public void unregisterSinkSubtask(int subtaskId) {
remaining,
jobId);
+ // Check if any pending requests can now be completed
+ // (Since we only need 1 ACK for DDL, this typically won't change anything,
+ // but we keep it for edge cases where all subtasks close before any ACK)
for (Map.Entry The test invokes the handler directly (as if a processing-time timer fired) to keep the
+ * unit test independent of Flink's timer infrastructure. In production, the handler is called
+ * by {@link SchemaOperator13#scheduleFallbackTimer()} via {@code
+ * ProcessingTimeService.registerTimer}.
+ *
+ * The base {@link SchemaOperator#scheduleFallbackTimer()} is a no-op; this test verifies
+ * only the handler logic, not the scheduling mechanism.
+ */
+ @Test
+ void testFallbackTimerRespectsCheckpointSafetyFence() throws Exception {
+ LocalSchemaCoordinator coordinator = Mockito.mock(LocalSchemaCoordinator.class);
+ Mockito.when(
+ coordinator.requestSchemaChange(
+ Mockito.any(), Mockito.anyLong(), Mockito.anyLong()))
+ .thenReturn(true);
+
+ OperatorTestContext context = createOperator(false);
+ setField(context.operator, "coordinator", coordinator);
+
+ AlterTableAddColumnEvent event = createSchemaChangeEvent();
+ SeaTunnelRow row = createDataRow("row-released-after-fallback");
+
+ context.operator.processElement(new StreamRecord<>(createSchemaRow(event), 400L));
+ context.operator.processElement(new StreamRecord<>(row, 401L));
+
+ // Simulate timer firing before any checkpoint has completed (firstSeenCheckpointId < 0).
+ // The handler must NOT apply the DDL — it must call scheduleFallbackTimer() to wait for
+ // the checkpoint-completion safety fence (guards XA/MDL conflicts).
+ invokeNoArgMethod(context.operator, "handleFallbackTimerOnTaskThread");
+
+ assertTrue(context.output.records.isEmpty());
+ assertTrue(getBooleanField(context.operator, "schemaChangePending"));
+ assertEquals(2, getPendingQueue(context.operator).size());
+ assertEquals(-1L, getLongField(context.operator, "firstSeenCheckpointId"));
+ Mockito.verifyNoInteractions(coordinator);
+
+ // Complete the first post-DDL checkpoint — sets firstSeenCheckpointId, not yet safe to
+ // apply (need one additional round, so notifyCheckpointComplete stops here).
+ context.operator.notifyCheckpointComplete(40L);
+
+ assertTrue(context.output.records.isEmpty());
+ assertEquals(40L, getLongField(context.operator, "firstSeenCheckpointId"));
+ assertTrue(getBooleanField(context.operator, "schemaChangePending"));
+ Mockito.verifyNoInteractions(coordinator);
+
+ // Simulate checkpoint stall: move lastCheckpointCompletedMs into the past beyond
+ // CHECKPOINT_STALL_TIMEOUT_MS (15 s). This mirrors the Flink 1.13 behaviour where
+ // high-parallelism CDC jobs stop checkpointing after some source subtasks finish.
+ setField(
+ context.operator,
+ "lastCheckpointCompletedMs",
+ System.currentTimeMillis() - 20_000L);
+
+ // Simulate timer firing again. firstSeenCheckpointId >= 0 and checkpoint has stalled,
+ // so the safety fence is satisfied — the DDL can now be applied.
+ invokeNoArgMethod(context.operator, "handleFallbackTimerOnTaskThread");
+
+ assertEquals(2, context.output.records.size());
+ assertSchemaBroadcast(context.output.records.get(0), event);
+ assertEquals(row, context.output.records.get(1).getValue());
+ assertFalse(getBooleanField(context.operator, "schemaChangePending"));
+ assertTrue(getPendingQueue(context.operator).isEmpty());
+ Mockito.verify(coordinator)
+ .requestSchemaChange(event.tableIdentifier(), event.getCreatedTime(), 300_000L);
+ }
+
private static OperatorTestContext createOperator(boolean restored) throws Exception {
return createOperator(new OperatorStateStoreStub(), restored);
}
@@ -285,6 +355,12 @@ private static void setField(Object target, Class> owner, String fieldName, Ob
field.set(target, value);
}
+ private static Object invokeNoArgMethod(Object target, String methodName) throws Exception {
+ java.lang.reflect.Method method = target.getClass().getDeclaredMethod(methodName);
+ method.setAccessible(true);
+ return method.invoke(target);
+ }
+
private static Field findField(Class> type, String fieldName) throws NoSuchFieldException {
Class> current = type;
while (current != null) {
diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/test/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriterTest.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/test/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriterTest.java
index 50be1a39d279..943d48b43d8f 100644
--- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/test/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriterTest.java
+++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/test/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriterTest.java
@@ -20,6 +20,10 @@
import org.apache.seatunnel.api.common.metrics.MetricsContext;
import org.apache.seatunnel.api.event.EventListener;
import org.apache.seatunnel.api.sink.SinkWriter;
+import org.apache.seatunnel.api.sink.SupportSchemaEvolutionSinkWriter;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.schema.event.AlterTableAddColumnEvent;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.junit.jupiter.api.Assertions;
@@ -28,7 +32,9 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
class FlinkSinkWriterTest {
@@ -83,13 +89,54 @@ void testSnapshotStateWithoutPrepareCommitFallsBack() throws Exception {
Assertions.assertEquals("state-3", states.get(0).getState());
}
+ @Test
+ void testSchemaChangeEventDoesNotForceCommit() throws Exception {
+ SchemaAwareRecordingSinkWriter delegate = new SchemaAwareRecordingSinkWriter();
+ RecordingContext context = new RecordingContext();
+
+ FlinkSinkWriter
- *
@@ -210,20 +232,14 @@ private TableIdentifier getPendingSchemaTableIdentifier() {
@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
super.notifyCheckpointComplete(checkpointId);
+ lastCheckpointCompletedMs = System.currentTimeMillis();
if (!schemaChangePending || pendingQueue.isEmpty()) {
return;
}
- BufferedRecord head = pendingQueue.peek();
- while (head != null && !head.isSchemaChange) {
- output.collect(new StreamRecord<>(head.row, head.timestamp));
- pendingQueue.poll();
- head = pendingQueue.peek();
- }
+ BufferedRecord head = advancePastDataRecords();
if (head == null) {
- schemaChangePending = false;
- firstSeenCheckpointId = -1L;
return;
}
@@ -266,6 +282,96 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception {
tableId,
eventTime);
+ applyNextPendingSchemaChange();
+ }
+
+ /**
+ * Handles a checkpoint-stall fallback on the task thread. Must be called from the Flink task
+ * thread (e.g. via {@code ProcessingTimeService.registerTimer} callback) to keep {@code
+ * output.collect} and operator state accesses thread-safe.
+ *
+ *