apache · stevenschlansker · May 28, 2026 · May 28, 2026 · Jun 26, 2026 · Jun 26, 2026
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fory.benchmark;
+
+import java.util.Arrays;
+import org.apache.fory.format.annotation.ForyVersion;
+import org.apache.fory.format.encoder.Encoders;
+import org.apache.fory.format.encoder.RowEncoder;
+import org.apache.fory.logging.Logger;
+import org.apache.fory.logging.LoggerFactory;
+import org.openjdk.jmh.Main;
+import org.openjdk.jmh.annotations.Benchmark;
+
+/**
+ * Row-codec schema-evolution throughput and allocation. Pair with the JMH gc profiler ({@code -prof
+ * gc}) to read {@code gc.alloc.rate.norm} (bytes per op). Two comparisons matter: {@code
+ * currentDecode} vs {@code olderDecode} shows that decoding an older payload through a projection
+ * codec allocates no more than decoding the current schema, because each projection holds its
+ * historical schema's row layout (no per-decode rebuild); and the {@code *NoEvolution} benchmarks
+ * vs their evolution-on counterparts show the steady-state cost of enabling {@code
+ * withSchemaEvolution()} when reading and writing current-version data.
+ */
+public class SchemaEvolutionSuite {
+  private static final Logger LOG = LoggerFactory.getLogger(SchemaEvolutionSuite.class);
+
+  public static class PersonV1 {
+    String name;
+    int age;
+  }
+
+  public static class PersonV2 {
+    String name;
+    int age;
+
+    @ForyVersion(since = 2)
+    String email;
+  }
+
+  // Evolution-enabled codecs for the current (V2) schema; the V1 codec only produces a payload
+  // whose hash routes the V2 reader onto its projection path. Both standard and compact formats
+  // are measured: compact is where a per-projection cached row layout matters, so olderDecode vs
+  // currentDecode there is the parity check.
+  private static final RowEncoder<PersonV1> v1Codec =
+      Encoders.buildBeanCodec(PersonV1.class).withSchemaEvolution().build().get();
+  private static final RowEncoder<PersonV2> v2Codec =
+      Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get();
+  private static final RowEncoder<PersonV1> v1CompactCodec =
+      Encoders.buildBeanCodec(PersonV1.class).compactEncoding().withSchemaEvolution().build().get();
+  private static final RowEncoder<PersonV2> v2CompactCodec =
+      Encoders.buildBeanCodec(PersonV2.class).compactEncoding().withSchemaEvolution().build().get();
+
+  // Evolution-disabled codecs for the same current (V2) schema. Comparing the *NoEvolution
+  // benchmarks against their evolution-on counterparts isolates the steady-state cost of the
+  // withSchemaEvolution() flag on the common path (reading and writing current-version data): the
+  // 8-byte hash slot the evolution wire format adds, plus the hash compare on decode.
+  private static final RowEncoder<PersonV2> v2PlainCodec =
+      Encoders.buildBeanCodec(PersonV2.class).build().get();
+  private static final RowEncoder<PersonV2> v2PlainCompactCodec =
+      Encoders.buildBeanCodec(PersonV2.class).compactEncoding().build().get();
+
+  private static final PersonV2 person = newPerson();
+  private static final byte[] currentBytes = v2Codec.encode(person);
+  private static final byte[] olderBytes = v1Codec.encode(newPersonV1());
+  private static final byte[] currentCompactBytes = v2CompactCodec.encode(person);
+  private static final byte[] olderCompactBytes = v1CompactCodec.encode(newPersonV1());
+  private static final byte[] plainBytes = v2PlainCodec.encode(person);
+  private static final byte[] plainCompactBytes = v2PlainCompactCodec.encode(person);
+
+  private static PersonV2 newPerson() {
+    PersonV2 p = new PersonV2();
+    p.name = "Ada Lovelace";
+    p.age = 36;
+    p.email = "ada@example.com";
+    return p;
+  }
+
+  private static PersonV1 newPersonV1() {
+    PersonV1 p = new PersonV1();
+    p.name = "Ada Lovelace";
+    p.age = 36;
+    return p;
+  }
+
+  @Benchmark
+  public Object encode() {
+    return v2Codec.encode(person);
+  }
+
+  @Benchmark
+  public Object currentDecode() {
+    return v2Codec.decode(currentBytes);
+  }
+
+  @Benchmark
+  public Object olderDecode() {
+    return v2Codec.decode(olderBytes);
+  }
+
+  @Benchmark
+  public Object compactEncode() {
+    return v2CompactCodec.encode(person);
+  }
+
+  @Benchmark
+  public Object compactCurrentDecode() {
+    return v2CompactCodec.decode(currentCompactBytes);
+  }
+
+  @Benchmark
+  public Object compactOlderDecode() {
+    return v2CompactCodec.decode(olderCompactBytes);
+  }
+
+  // Evolution-off baselines for the current path. Pair each with its evolution-on counterpart
+  // (encode/currentDecode and the compact variants) to read the flag's overhead.
+  @Benchmark
+  public Object encodeNoEvolution() {
+    return v2PlainCodec.encode(person);
+  }
+
+  @Benchmark
+  public Object currentDecodeNoEvolution() {
+    return v2PlainCodec.decode(plainBytes);
+  }
+
+  @Benchmark
+  public Object compactEncodeNoEvolution() {
+    return v2PlainCompactCodec.encode(person);
+  }
+
+  @Benchmark
+  public Object compactCurrentDecodeNoEvolution() {
+    return v2PlainCompactCodec.decode(plainCompactBytes);
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length == 0) {
+      String commandLine =
+          "org.apache.fory.*SchemaEvolutionSuite.* -f 3 -wi 3 -i 3 -t 1 -w 2s -r 2s -prof gc -rf csv";
+      args = commandLine.split(" ");
+    }
+    LOG.info("command line: {}", Arrays.toString(args));
+    Main.main(args);
+  }
+}
@@ -187,6 +187,116 @@ std::string str = bar10->get_string(0);
 | Memory usage         | Full object graph in memory   | Only accessed fields            |
 | Suitable for         | Small objects, full access    | Large objects, selective access |
 
+## Schema evolution
+
+Enable `.withSchemaEvolution()` on a row, array, or map codec builder to read payloads written
+by older versions of the same bean. Writing always uses the current version; reading detects
+the payload's version from a strict hash at the head of the payload. Java only.
+
+Annotate fields added after v1 with `@ForyVersion(since = N)`:
+
+```java
+@Data
+public class Person {
+  String name;
+  int age;
+
+  @ForyVersion(since = 2)
+  String email;
+}
+```
+
+A v1 payload (with `name` and `age` only) decodes to a `Person` whose `email` is `null`.
+Primitive fields added later default to `0`, `0.0`, or `false`. Unannotated fields are treated
+as present from the first version, so a class can adopt versioning by annotating only the fields
+added after v1.
+
+For a record, the absent component's default is passed to the canonical constructor, so a
+constructor that rejects `null` for a reference component added in a later version throws when
+decoding an older payload. Let the constructor tolerate the missing value, for example by
+normalizing `null` to a default:
+
+```java
+public record Person(String name, @ForyVersion(since = 2) String email) {
+  public Person {
+    if (email == null) {
+      email = "";
+    }
+  }
+}
+```
+
+Remove a field by deleting the Java member and declaring it on a nested history interface as a
+method with a `@ForyVersion(until = N)`. The method's return type carries any parameterized
+type information from the original field.
+
+```java
+@Data
+@ForySchema(removedFields = Person.History.class)
+public class Person {
+  String name;
+
+  @ForyVersion(since = 2)
+  String email;
+
+  interface History {
+    @ForyVersion(until = 3)
+    int age();
+
+    @ForyVersion(until = 5)
+    List<String> tags();
+  }
+}
+```
+
+The history method name matches the original live descriptor name. For field-backed beans
+(Lombok `@Data`, records, or plain classes with a backing field) that is the field name
+(`age`, `tags`). For interface beans, where the live member is a getter with no backing field,
+it is the method name (`getAge`).
+
+### Wire format and limitations
+
+Producers and consumers must agree on the `withSchemaEvolution()` flag — they are not
+wire-compatible otherwise. Row payloads always carry an 8-byte hash slot; under evolution its
+value is the strict hash (which includes field name and nullability), so a flag-mismatched
+peer fails loudly with `ClassNotCompatibleException`. Arrays and maps of bean elements prepend
+an 8-byte strict-hash prefix under evolution and no prefix otherwise; an evolution-on consumer
+reading evolution-off bytes also fails with `ClassNotCompatibleException`, but the reverse
+direction (evolution-off consumer, evolution-on bytes) is undefined.
+
+To adopt the flag on an existing deployment, enable `withSchemaEvolution()` on both sides in a
+release that changes no schema, then start evolving schemas only once every peer is on the
+evolution-enabled build. Turning the flag on and changing a schema in the same release strands
+any peer that has not yet upgraded.
+
+Cross-language consumers (Python, C++) cannot read evolution-enabled payloads.
+
+A reader selects the matching layout from the 8-byte strict hash on the payload. The hash includes
+field names and nullability and is checked for collisions across a bean's own versions when the
+codec is built, but it is still a 64-bit value: a payload whose hash coincides with one of the
+reader's historical layouts is decoded against that layout. This is the same hash-based dispatch
+the row format has always used, so feeding a codec bytes it was not built for has undefined results
+whether or not evolution is enabled. Only hand a codec payloads produced for the same bean.
+
+Nested evolution works to arbitrary depth and places no restriction on shape: a versioned bean
+may contain versioned beans that themselves contain versioned beans, the same versioned bean
+class may back more than one field, and fields typed as a non-evolving bean, a list, or a map are
+unrestricted. Each nesting level is routed to the correct historical layout. A versioned bean may
+be used as a map key as well as a map value, and the key and value evolve independently. This
+holds wherever the map appears: as the codec's top-level type, nested inside a bean field, or
+reached through a top-level array or map (such as `List<Map<KeyBean, ValueBean>>`), and a single
+map may evolve more than one distinct bean class across its key and value. A top-level map carries
+its own hash identifying both layouts together; a map nested inside an array, another map, or a
+bean field has its layouts folded into the enclosing payload's hash.
+
+When a versioned bean contains other versioned beans, the reader can read one projection layout per
+combination of versions across the composition. A reader compiles a combination's codec the first
+time it decodes a payload at that combination, so the cost tracks the historical versions you
+actually receive, not the number you could in principle define. A map whose key and value both
+evolve combines their versions the same way. Retiring an entry from a bean's `History` interface
+once you no longer read payloads from that range stops the reader from accepting those payloads; it
+is purely a read-side decision, and the writer always uses the current schema.
+
 ## Related Topics
 
 - [Xlang Serialization](xlang-serialization.md) - xlang mode

@@ -343,6 +343,16 @@ if (fixed_width % 8 == 0):
 
 ---
 
+## Schema Evolution (Java Only)
+
+Schema evolution lets a codec read payloads written by older versions of the same bean. It is implemented in Java only and does not change the cross-language wire contract above; producer and consumer must agree on whether it is enabled.
+
+The Java encoder frames a row payload with a leading 8-byte schema-hash word. When evolution is enabled, that word holds a stricter hash that also distinguishes field names and nullability; otherwise it holds the format's default schema hash. Array and map payloads carry no hash word otherwise, so under evolution they gain an 8-byte strict-hash prefix. A map's prefix is a single hash that identifies the key and value layouts together, so a map key and value evolve independently while the payload still carries one hash.
+
+See the [Java row format guide](../guide/java/row-format.md#schema-evolution) for usage, annotations, and limitations.
+
+---
+
 ## Common Specifications
 
 The following specifications apply to both standard and compact formats.

@@ -966,7 +966,11 @@ private static boolean isSupported(TypeRef<?> typeRef, TypeResolutionContext ctx
         return false;
       }
       Tuple2<TypeRef<?>, TypeRef<?>> mapKeyValueType = getMapKeyValueType(typeRef);
-      return isSupported(mapKeyValueType.f0) && isSupported(mapKeyValueType.f1);
+      // Thread ctx through both key and value, matching the iterable branch above. The single-arg
+      // isSupported overload builds a fresh context with synthesizeInterfaces=false and the empty
+      // custom-type registry, which would reject an interface bean used as a map key or value even
+      // though the same type is supported as a direct field or list element.
+      return isSupported(mapKeyValueType.f0, ctx) && isSupported(mapKeyValueType.f1, ctx);
     } else if (cls.isEnum()) {
       return true;
     } else {

@@ -99,6 +99,22 @@
 
   <build>
     <plugins>
+      <!--
+        Compile against the Java 11 API, not just to Java 11 bytecode. The parent sets only
+        source/target, which still resolves against the build JDK's bootclasspath, so a newer-than-11
+        API (e.g. the Java 16 record reflection API) compiles silently and then fails at runtime on
+        Java 11. release=11 makes such a leak a compile error here. Per-module: fory-core cannot use
+        release because it depends on sun.misc.Unsafe, which release excludes.
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <release>11</release>
+          <source combine.self="override"/>
+          <target combine.self="override"/>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-jar-plugin</artifactId>

@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fory.format.annotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Class-level row-codec schema metadata used when the codec builder enables schema evolution.
+ *
+ * <p>Live fields without a {@link ForyVersion} annotation are treated as present from the first
+ * version, so a class can adopt versioning by annotating only the fields added later.
+ *
+ * <p>{@link #removedFields()} points at a class (conventionally a nested {@code interface}) whose
+ * accessor methods describe fields that have been removed from this bean but still appear on the
+ * wire in older payloads. Each method's return type is the original Java type of the removed field;
+ * each method must carry a {@link ForyVersion} annotation with {@code until} set, since removed
+ * fields have a known end-of-life version.
+ *
+ * <p>Example:
+ *
+ * <pre>
+ * &#64;Data
+ * &#64;ForySchema(removedFields = MyBean.History.class)
+ * public class MyBean {
+ *   private String name;
+ *
+ *   interface History {
+ *     &#64;ForyVersion(until = 3)
+ *     List&lt;String&gt; tags();
+ *
+ *     &#64;ForyVersion(since = 2, until = 5)
+ *     Map&lt;String, Long&gt; counters();
+ *   }
+ * }
+ * </pre>
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface ForySchema {
+  /**
+   * A class whose accessor methods describe historically-present-but-now-removed fields. Default
+   * {@code void.class} means there are no removed fields. The class is never instantiated; the
+   * codec reads its method signatures and annotations.
+   */
+  Class<?> removedFields() default void.class;
+}