From d6941a9f14f8c50dc14c4e930b7dae0363d95e66 Mon Sep 17 00:00:00 2001
From: Polina Binder <pbinder@nvidia.com>
Date: Fri, 12 Jun 2026 03:47:09 +0000
Subject: [PATCH 1/4] evo2 serve: FastAPI server + CLI (stacked on the engine)

The HTTP/CLI layer over Evo2SAE: server.py (/health /features /annotate /generate),
cli.py (serve/encode/batch/generate), launch_inference.sh, and the CPU contract tests.
Thin wrappers over the engine PR's core.py.

Signed-off-by: Polina Binder <pbinder@nvidia.com>
---
 .../recipes/evo2/scripts/launch_inference.sh  |  37 ++++
 .../recipes/evo2/src/evo2_sae/cli.py          | 197 ++++++++++++++++++
 .../recipes/evo2/src/evo2_sae/server.py       | 178 ++++++++++++++++
 .../recipes/evo2/tests/test_server.py         | 106 ++++++++++
 4 files changed, 518 insertions(+)
 create mode 100755 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh
 create mode 100644 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py
 create mode 100644 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
 create mode 100644 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py

diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh
new file mode 100755
index 0000000000..26768a4c46
--- /dev/null
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Launch the Evo2 SAE inference engine. One engine, four modes:
+#
+#   ./launch_inference.sh serve                       # live HTTP server on :8001 (viz backend)
+#   ./launch_inference.sh encode  --sequence ATGC...  # annotate ONE sequence -> top features
+#   ./launch_inference.sh batch   --fasta in.fa --out out.parquet   # MANY sequences -> parquet
+#   ./launch_inference.sh generate --prompt ATGC... --clamp 29244:300  # steer + generate DNA
+#
+# Steering loop: `encode` a sequence to find an active feature id, then
+# `generate --clamp ID:STRENGTH` (strength ~2-3x the feature's max_activation; repeat --clamp).
+#
+# Config via env. Required: EVO2_CKPT_DIR, SAE_CKPT_PATH. Optional (have defaults):
+# FEATURE_ANNOTATIONS, EMBEDDING_LAYER (26), DEVICE, PORT, CUDA_VISIBLE_DEVICES.
+#
+# Requires the evo2_megatron recipe venv (provides bionemo.evo2 + megatron).
+set -euo pipefail
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+RECIPE_DIR="$(cd "$HERE/.." && pwd)"  # recipes/evo2 — so the evo2_sae package imports
+
+# Required (no hardcoded defaults — supply your own paths via env):
+VENV="${VENV:?Set VENV to the evo2_megatron recipe .venv (provides bionemo.evo2 + megatron)}"
+export EVO2_CKPT_DIR="${EVO2_CKPT_DIR:?Set EVO2_CKPT_DIR to an Evo2 MBridge checkpoint directory}"
+export SAE_CKPT_PATH="${SAE_CKPT_PATH:?Set SAE_CKPT_PATH to a trained SAE checkpoint (.pt)}"
+# Optional: feature-label parquet (empty = features are unlabeled). Layer defaults to 26.
+export FEATURE_ANNOTATIONS="${FEATURE_ANNOTATIONS:-}"
+export EMBEDDING_LAYER="${EMBEDDING_LAYER:-26}"
+
+if [[ ! -x "$VENV/bin/python" ]]; then
+  echo "ERROR: evo2_megatron venv not found at $VENV (build it with the recipe's .ci_build.sh)" >&2
+  exit 1
+fi
+
+source "$VENV/bin/activate"
+cd "$RECIPE_DIR"
+export PYTHONPATH="$RECIPE_DIR/src${PYTHONPATH:+:$PYTHONPATH}"
+exec python -m evo2_sae.cli "$@"
diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py
new file mode 100644
index 0000000000..98185dd51a
--- /dev/null
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py
@@ -0,0 +1,197 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Evo2 SAE inference CLI — one engine, four modes.
+
+    serve   : start the FastAPI server (one sequence at a time, interactive)
+    encode  : annotate ONE sequence -> top features (stdout JSON)
+    batch   : run a FASTA of MANY sequences -> parquet of per-sequence top features
+    generate: generate DNA, optionally steering SAE features (stdout JSON)
+
+They all build the same `Evo2SAE` engine; config comes from flags or env
+(EVO2_CKPT_DIR / SAE_CKPT_PATH / FEATURE_ANNOTATIONS / EMBEDDING_LAYER).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+
+
+def _add_common(p: argparse.ArgumentParser) -> None:
+    """Register the shared inference arguments (checkpoints, layer, device) on a parser.
+
+    Defaults come from env vars (``EVO2_CKPT_DIR``, ``SAE_CKPT_PATH``, ``FEATURE_ANNOTATIONS``,
+    ``EMBEDDING_LAYER``, ``DEVICE``, ``MAX_SEQ_LEN``); pass the flags to override. No hardcoded
+    paths — the checkpoints must be supplied via flag or env.
+
+    Args:
+        p: The argparse parser (or subparser) to add the shared arguments to.
+
+    Returns:
+        None. Mutates ``p`` in place.
+    """
+    p.add_argument("--evo2-ckpt-dir", default=os.environ.get("EVO2_CKPT_DIR"))
+    p.add_argument("--sae-ckpt-path", default=os.environ.get("SAE_CKPT_PATH"))
+    p.add_argument("--feature-annotations", default=os.environ.get("FEATURE_ANNOTATIONS"))
+    p.add_argument("--layer", type=int, default=int(os.environ.get("EMBEDDING_LAYER", "26")))
+    p.add_argument("--device", default=os.environ.get("DEVICE", "cuda"))
+    p.add_argument("--max-seq-len", type=int, default=int(os.environ.get("MAX_SEQ_LEN", "8192")))
+
+
+def _engine(args):
+    """Construct an Evo2SAE engine from parsed CLI args.
+
+    Args:
+        args: Parsed argparse namespace with ``evo2_ckpt_dir``, ``sae_ckpt_path``, ``layer``,
+            ``device``, ``max_seq_len``, ``feature_annotations``.
+
+    Returns:
+        An (unloaded) ``Evo2SAE`` instance — call ``.load()`` before use.
+    """
+    from .core import Evo2SAE
+
+    return Evo2SAE(
+        evo2_ckpt_dir=args.evo2_ckpt_dir,
+        sae_ckpt_path=args.sae_ckpt_path,
+        layer=args.layer,
+        device=args.device,
+        max_seq_len=args.max_seq_len,
+        feature_annotations=args.feature_annotations,
+    )
+
+
+def _parse_clamps(clamps: list[str]) -> list[dict]:
+    """Parse repeated ``--clamp FEATURE_ID[:STRENGTH]`` args into [{feature_id, strength}].
+
+    Strength defaults to 1.0 if omitted (e.g. ``--clamp 29244:300`` or ``--clamp 29244``).
+    """
+    specs = []
+    for c in clamps:
+        fid, sep, strength = c.partition(":")
+        specs.append({"feature_id": int(fid), "strength": float(strength) if (sep and strength) else 1.0})
+    return specs
+
+
+def main():
+    """Parse args and dispatch to the serve / encode / batch subcommand."""
+    ap = argparse.ArgumentParser(description="Evo2 SAE inference (serve | encode | batch | generate)")
+    sub = ap.add_subparsers(dest="cmd", required=True)
+
+    ps = sub.add_parser("serve", help="start the FastAPI inference server")
+    _add_common(ps)
+    ps.add_argument("--host", default="0.0.0.0")
+    ps.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8001")))
+
+    pe = sub.add_parser("encode", help="annotate ONE sequence -> top features (JSON)")
+    _add_common(pe)
+    pe.add_argument("--sequence", required=True)
+    pe.add_argument("--organism", default="None (raw DNA)")
+    pe.add_argument("--top-k", type=int, default=8)
+
+    pb = sub.add_parser("batch", help="MANY sequences (FASTA) -> parquet of per-sequence top features")
+    _add_common(pb)
+    pb.add_argument("--fasta", required=True)
+    pb.add_argument("--out", required=True)
+    pb.add_argument("--top-k", type=int, default=16)
+    pb.add_argument("--batch-size", type=int, default=8)
+
+    pg = sub.add_parser("generate", help="generate DNA, optionally steering SAE features")
+    _add_common(pg)
+    pg.add_argument("--prompt", default="", help="DNA to seed; steering applies to the continuation")
+    pg.add_argument("--organism", default="None (raw DNA)")
+    pg.add_argument(
+        "--clamp",
+        action="append",
+        default=[],
+        metavar="FEATURE_ID[:STRENGTH]",
+        help="clamp a feature on the continuation; repeatable (e.g. --clamp 29244:300). "
+        "Find feature ids with `encode`.",
+    )
+    pg.add_argument("--n-tokens", type=int, default=120)
+    pg.add_argument("--temperature", type=float, default=1.0)
+    pg.add_argument("--top-k", type=int, default=0)
+    pg.add_argument("--compare-baseline", action="store_true", help="also generate unsteered, for comparison")
+
+    args = ap.parse_args()
+
+    if args.cmd == "serve":
+        import uvicorn
+
+        from .server import build_app
+
+        uvicorn.run(build_app(_engine(args)), host=args.host, port=args.port, log_level="info")
+        return
+
+    from .core import clean_dna
+
+    eng = _engine(args).load()
+
+    if args.cmd == "encode":
+        tag = eng.resolve_tag(args.organism, None) or ""
+        dna = clean_dna(args.sequence)
+        codes = eng.encode(tag + dna)
+        tag_len = len(tag) if codes.shape[0] >= len(tag) else 0
+        feats = eng.top_features(codes, tag_len=tag_len, k=args.top_k)
+        print(
+            json.dumps(
+                {"sequence": dna, "organism": args.organism, "bases": len(dna), "top_features": feats}, indent=2
+            )
+        )
+
+    elif args.cmd == "batch":
+        import pandas as pd
+
+        from .fasta import read_fasta
+
+        ids, seqs = [], []
+        for sid, seq in read_fasta(args.fasta):
+            ids.append(sid)
+            seqs.append(seq)
+        print(f"[batch] {len(seqs)} sequences from {args.fasta}; encoding (batch_size={args.batch_size})…")
+        codes_list = eng.encode_batch(seqs, batch_size=args.batch_size)
+        rows = []
+        for sid, codes in zip(ids, codes_list):
+            for rank, ft in enumerate(eng.top_features(codes, k=args.top_k)):
+                rows.append({"sequence_id": sid, "bp": int(codes.shape[0]), "rank": rank, **ft})
+        df = pd.DataFrame(rows)
+        df.to_parquet(args.out, index=False)
+        print(f"[batch] wrote {len(df)} rows for {len(seqs)} sequences -> {args.out}")
+
+    elif args.cmd == "generate":
+        out = eng.generate(
+            prompt=args.prompt,
+            organism=args.organism,
+            features=_parse_clamps(args.clamp),
+            n_tokens=args.n_tokens,
+            temperature=args.temperature,
+            top_k=args.top_k,
+            compare_baseline=args.compare_baseline,
+        )
+        result = {
+            "prompt": out["prompt"],
+            "organism": out["organism"],
+            "steered": out["steered"],
+            "features": out["features"],
+            "sequence": out["generation"]["sequence"],
+        }
+        if out.get("baseline"):
+            result["baseline_sequence"] = out["baseline"]["sequence"]
+        print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
new file mode 100644
index 0000000000..bb7e3b391d
--- /dev/null
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
@@ -0,0 +1,178 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""FastAPI server over the Evo2SAE engine — the live backend the viz talks to.
+
+Endpoints: /health, /features, /annotate (per-base activations for a pasted
+sequence), /generate (autoregressive generation + optional SAE-feature clamp).
+This is a thin layer; all model work lives in `core.Evo2SAE`.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from contextlib import asynccontextmanager
+from typing import Optional
+
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+
+from .core import Evo2SAE, clean_dna
+
+
+logger = logging.getLogger("evo2_sae_infer.server")
+
+
+class AnnotateRequest(BaseModel):
+    """Request body for /annotate (top-k feature scan or an explicit feature pick)."""
+
+    sequence: str
+    organism: str = "None (raw DNA)"
+    tag: Optional[str] = None
+    mode: str = "topk"  # "topk" | "pick"
+    k: int = 8
+    feature_ids: Optional[list[int]] = None
+    feature_id: Optional[int] = None
+
+
+class FeatureClamp(BaseModel):
+    """A single SAE-feature steering clamp (feature id + target strength)."""
+
+    feature_id: int
+    strength: float = 1.0
+
+
+class GenerateRequest(BaseModel):
+    """Request body for /generate (autoregressive generation + optional SAE-feature clamps)."""
+
+    prompt: str = ""
+    organism: str = "None (raw DNA)"
+    tag: Optional[str] = None
+    features: list[FeatureClamp] = []
+    n_tokens: int = 120
+    temperature: float = 1.0
+    top_k: int = 0
+    compare_baseline: bool = False
+
+
+def build_app(engine: Evo2SAE) -> FastAPI:
+    """Build the FastAPI app; the engine is loaded once in the lifespan handler."""
+
+    @asynccontextmanager
+    async def lifespan(app: FastAPI):
+        try:
+            engine.load()
+            logger.info("engine ready")
+        except Exception:
+            logger.exception("engine startup failed — /health stays not-ready")
+        yield
+
+    app = FastAPI(title="Evo2 SAE inference", lifespan=lifespan)
+    allowed_origins = os.getenv("CORS_ORIGINS", "*").split(",")  # comma-separated; "*" by default (local backend)
+    app.add_middleware(CORSMiddleware, allow_origins=allowed_origins, allow_methods=["*"], allow_headers=["*"])
+
+    @app.get("/health")
+    def health():
+        return {
+            "ready": bool(engine.ready),
+            "layer": engine.layer,
+            "n_features": engine.n_features,
+            "n_labels": len(engine.labels),
+            "sae_path": engine.sae_ckpt_path,
+            "organisms": list(engine.organism_tags.keys()),
+            "organism_tags": engine.organism_tags,
+            "device": engine.device,
+        }
+
+    @app.get("/features")
+    def features():
+        if not engine.ready:
+            raise HTTPException(503, "Backend not ready")
+        rows = [
+            {"id": int(f), "label": lab, "natural_peak": engine.peaks.get(int(f))} for f, lab in engine.labels.items()
+        ]
+        rows.sort(key=lambda r: r["id"])
+        return rows
+
+    @app.post("/annotate")
+    def annotate(req: AnnotateRequest):
+        if not engine.ready:
+            raise HTTPException(503, "Backend not ready")
+        dna = clean_dna(req.sequence)
+        if not dna:
+            raise HTTPException(400, "No valid nucleotides in sequence")
+        tag = engine.resolve_tag(req.organism, req.tag)
+        if tag is None:
+            raise HTTPException(400, f"Unknown organism '{req.organism}' and no custom tag")
+        full = tag + dna
+        tag_len = len(tag)
+        codes = engine.encode(full)  # [S, n_features], lock held inside
+        if codes.shape[0] < tag_len:
+            tag_len = 0
+        if req.mode not in ("pick", "topk"):
+            raise HTTPException(400, f"Invalid mode {req.mode!r}: must be 'pick' or 'topk'")
+        if req.mode == "pick":
+            ids = req.feature_ids or ([req.feature_id] if req.feature_id is not None else [])
+            if not ids:
+                raise HTTPException(400, "mode='pick' requires feature_ids")
+            chosen = [int(i) for i in ids]
+        else:
+            k = max(1, min(int(req.k), 64))
+            chosen = [ft["feature_id"] for ft in engine.top_features(codes, tag_len=tag_len, k=k)]
+        feats = []
+        for fid in chosen:
+            col = codes[:, fid]
+            feats.append(
+                {
+                    "feature_id": fid,
+                    "label": engine.labels.get(fid),
+                    "max_activation": float(col[tag_len:].max().item())
+                    if codes.shape[0] > tag_len
+                    else float(col.max().item()),
+                    "activations": [round(float(v), 4) for v in col.tolist()],
+                }
+            )
+        return {
+            "sequence": dna,
+            "organism": req.organism,
+            "tag": tag,
+            "tag_len": tag_len,
+            "bases": list(full),
+            "n_tokens": codes.shape[0],
+            "layer": engine.layer,
+            "features": feats,
+        }
+
+    @app.post("/generate")
+    def generate(req: GenerateRequest):
+        if not engine.ready:
+            raise HTTPException(503, "Backend not ready")
+        try:
+            return engine.generate(
+                prompt=req.prompt,
+                organism=req.organism,
+                tag=req.tag,
+                features=[f.model_dump() for f in req.features],
+                n_tokens=req.n_tokens,
+                temperature=req.temperature,
+                top_k=req.top_k,
+                compare_baseline=req.compare_baseline,
+            )
+        except ValueError as e:
+            raise HTTPException(400, str(e))
+
+    return app
diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py
new file mode 100644
index 0000000000..82b5b0726b
--- /dev/null
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Server contract tests — the API the feature-explorer viz consumes.
+
+A mocked engine (no model, CPU-only) drives the FastAPI app so these run in CI and lock the
+response shapes + error codes the dashboard depends on: /health, /features, /annotate (per-base
+activations), /generate. Real model inference is covered by test_steering.py.
+"""
+
+import pytest
+import torch
+from evo2_sae.server import build_app
+from fastapi.testclient import TestClient
+
+
+class FakeEngine:
+    """Minimal stand-in for Evo2SAE exposing only what the server endpoints touch."""
+
+    def __init__(self):
+        self.ready = True
+        self.layer = 19
+        self.n_features = 4
+        self.labels = {0: "feat0", 1: "feat1"}
+        self.peaks = {0: 0.5}
+        self.organism_tags = {"None (raw DNA)": "", "Human": "|tag|"}
+        self.device = "cpu"
+        self.sae_ckpt_path = "fake.pt"
+
+    def load(self):
+        self.ready = True
+
+    def resolve_tag(self, organism, tag):
+        return tag if tag is not None else self.organism_tags.get(organism)
+
+    def encode(self, full):
+        codes = torch.zeros(len(full), self.n_features)
+        codes[:, 0] = 1.0  # feature 0 fires everywhere
+        return codes
+
+    def top_features(self, codes, tag_len=0, k=8):
+        return [{"feature_id": 0, "label": self.labels.get(0), "max_activation": 1.0}]
+
+    def generate(self, **kw):
+        if not kw.get("prompt") and kw.get("organism") == "None (raw DNA)" and not kw.get("tag"):
+            raise ValueError("need a seed")
+        return {
+            "generation": {"sequence": "ACGT", "activations": {0: [1.0, 1.0, 1.0, 1.0]}},
+            "baseline": None,
+            "features": [],
+            "steered": False,
+        }
+
+
+@pytest.fixture
+def client():
+    with TestClient(build_app(FakeEngine())) as c:
+        yield c
+
+
+def test_health(client):
+    b = client.get("/health").json()
+    assert b["ready"] is True and b["layer"] == 19
+    assert "None (raw DNA)" in b["organisms"]
+
+
+def test_features(client):
+    rows = client.get("/features").json()
+    assert {"id", "label", "natural_peak"} <= set(rows[0])
+
+
+def test_annotate_returns_per_base_activations(client):
+    b = client.post("/annotate", json={"sequence": "ACGTACGT", "organism": "None (raw DNA)"}).json()
+    assert {"sequence", "features", "bases", "tag_len", "layer", "n_tokens"} <= set(b)
+    assert b["features"][0]["activations"]  # the per-base track the viz plots
+
+
+def test_annotate_rejects_non_dna(client):
+    assert client.post("/annotate", json={"sequence": "ZZZZ"}).status_code == 400
+
+
+def test_generate_returns_sequence(client):
+    b = client.post("/generate", json={"prompt": "ACGT", "organism": "None (raw DNA)"}).json()
+    assert b["generation"]["sequence"]
+
+
+def test_endpoints_503_until_ready():
+    eng = FakeEngine()
+    eng.ready = False
+    eng.load = lambda: None  # startup leaves it not-ready
+    with TestClient(build_app(eng)) as c:
+        assert c.get("/features").status_code == 503
+        assert c.post("/annotate", json={"sequence": "ACGT"}).status_code == 503
+        assert c.post("/generate", json={"prompt": "ACGT", "organism": "None (raw DNA)"}).status_code == 503

From 76ef59c8c693ade82d8ffdccf0589a9477eecd72 Mon Sep 17 00:00:00 2001
From: Polina Binder <pbinder@nvidia.com>
Date: Fri, 12 Jun 2026 04:22:54 +0000
Subject: [PATCH 2/4] evo2 serve: DRY the 'backend not ready' 503 guard into
 _require_ready()

/features, /annotate, /generate each repeated the readiness check + message; one
build_app-local _require_ready() helper. test_server (503-until-ready) still green.

Signed-off-by: Polina Binder <pbinder@nvidia.com>
---
 .../recipes/evo2/src/evo2_sae/server.py              | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
index bb7e3b391d..87973340d4 100644
--- a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
@@ -85,6 +85,9 @@ async def lifespan(app: FastAPI):
     allowed_origins = os.getenv("CORS_ORIGINS", "*").split(",")  # comma-separated; "*" by default (local backend)
     app.add_middleware(CORSMiddleware, allow_origins=allowed_origins, allow_methods=["*"], allow_headers=["*"])
 
+    def _require_ready():
+        _require_ready()
+
     @app.get("/health")
     def health():
         return {
@@ -100,8 +103,7 @@ def health():
 
     @app.get("/features")
     def features():
-        if not engine.ready:
-            raise HTTPException(503, "Backend not ready")
+        _require_ready()
         rows = [
             {"id": int(f), "label": lab, "natural_peak": engine.peaks.get(int(f))} for f, lab in engine.labels.items()
         ]
@@ -110,8 +112,7 @@ def features():
 
     @app.post("/annotate")
     def annotate(req: AnnotateRequest):
-        if not engine.ready:
-            raise HTTPException(503, "Backend not ready")
+        _require_ready()
         dna = clean_dna(req.sequence)
         if not dna:
             raise HTTPException(400, "No valid nucleotides in sequence")
@@ -159,8 +160,7 @@ def annotate(req: AnnotateRequest):
 
     @app.post("/generate")
     def generate(req: GenerateRequest):
-        if not engine.ready:
-            raise HTTPException(503, "Backend not ready")
+        _require_ready()
         try:
             return engine.generate(
                 prompt=req.prompt,

From 9fd49ed41dbc4d0b9ca1b3b9524baf750f907f72 Mon Sep 17 00:00:00 2001
From: Polina Binder <pbinder@nvidia.com>
Date: Fri, 12 Jun 2026 04:23:47 +0000
Subject: [PATCH 3/4] fix: _require_ready recursion (replace_all clobbered its
 own body)

The previous commit's replace_all rewrote _require_ready's body into a self-call ->
RecursionError. Restore the real readiness check. test_server green again.

Signed-off-by: Polina Binder <pbinder@nvidia.com>
---
 .../sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
index 87973340d4..cefe5fdc1e 100644
--- a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py
@@ -86,7 +86,8 @@ async def lifespan(app: FastAPI):
     app.add_middleware(CORSMiddleware, allow_origins=allowed_origins, allow_methods=["*"], allow_headers=["*"])
 
     def _require_ready():
-        _require_ready()
+        if not engine.ready:
+            raise HTTPException(503, "Backend not ready")
 
     @app.get("/health")
     def health():

From 6e1b7d7e707401d6d850913750049ee9aa21338c Mon Sep 17 00:00:00 2001
From: Polina Binder <pbinder@nvidia.com>
Date: Fri, 12 Jun 2026 05:37:55 +0000
Subject: [PATCH 4/4] style(core): ruff-format blank line after in-function
 import (fix pre-commit CI)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Polina Binder <pbinder@nvidia.com>
---
 .../sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py
index 00035d9b7f..6cc72cc80b 100644
--- a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py
+++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py
@@ -353,6 +353,7 @@ def generate(
             comp = self._ensure_engine()
             hook_layer = unwrap_model(comp.model).decoder.layers[self.layer]
             from sae.steering import clamp_hook
+
             feat_meta = [{"id": fid, "label": self.labels.get(fid), "strength": s} for fid, s in clamps.items()]
 
             def _run(steer: bool) -> str: