From d6941a9f14f8c50dc14c4e930b7dae0363d95e66 Mon Sep 17 00:00:00 2001 From: Polina Binder Date: Fri, 12 Jun 2026 03:47:09 +0000 Subject: [PATCH 1/4] evo2 serve: FastAPI server + CLI (stacked on the engine) The HTTP/CLI layer over Evo2SAE: server.py (/health /features /annotate /generate), cli.py (serve/encode/batch/generate), launch_inference.sh, and the CPU contract tests. Thin wrappers over the engine PR's core.py. Signed-off-by: Polina Binder --- .../recipes/evo2/scripts/launch_inference.sh | 37 ++++ .../recipes/evo2/src/evo2_sae/cli.py | 197 ++++++++++++++++++ .../recipes/evo2/src/evo2_sae/server.py | 178 ++++++++++++++++ .../recipes/evo2/tests/test_server.py | 106 ++++++++++ 4 files changed, 518 insertions(+) create mode 100755 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh create mode 100644 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py create mode 100644 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py create mode 100644 bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh new file mode 100755 index 0000000000..26768a4c46 --- /dev/null +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Launch the Evo2 SAE inference engine. One engine, four modes: +# +# ./launch_inference.sh serve # live HTTP server on :8001 (viz backend) +# ./launch_inference.sh encode --sequence ATGC... # annotate ONE sequence -> top features +# ./launch_inference.sh batch --fasta in.fa --out out.parquet # MANY sequences -> parquet +# ./launch_inference.sh generate --prompt ATGC... --clamp 29244:300 # steer + generate DNA +# +# Steering loop: `encode` a sequence to find an active feature id, then +# `generate --clamp ID:STRENGTH` (strength ~2-3x the feature's max_activation; repeat --clamp). +# +# Config via env. Required: EVO2_CKPT_DIR, SAE_CKPT_PATH. Optional (have defaults): +# FEATURE_ANNOTATIONS, EMBEDDING_LAYER (26), DEVICE, PORT, CUDA_VISIBLE_DEVICES. +# +# Requires the evo2_megatron recipe venv (provides bionemo.evo2 + megatron). +set -euo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +RECIPE_DIR="$(cd "$HERE/.." && pwd)" # recipes/evo2 — so the evo2_sae package imports + +# Required (no hardcoded defaults — supply your own paths via env): +VENV="${VENV:?Set VENV to the evo2_megatron recipe .venv (provides bionemo.evo2 + megatron)}" +export EVO2_CKPT_DIR="${EVO2_CKPT_DIR:?Set EVO2_CKPT_DIR to an Evo2 MBridge checkpoint directory}" +export SAE_CKPT_PATH="${SAE_CKPT_PATH:?Set SAE_CKPT_PATH to a trained SAE checkpoint (.pt)}" +# Optional: feature-label parquet (empty = features are unlabeled). Layer defaults to 26. +export FEATURE_ANNOTATIONS="${FEATURE_ANNOTATIONS:-}" +export EMBEDDING_LAYER="${EMBEDDING_LAYER:-26}" + +if [[ ! -x "$VENV/bin/python" ]]; then + echo "ERROR: evo2_megatron venv not found at $VENV (build it with the recipe's .ci_build.sh)" >&2 + exit 1 +fi + +source "$VENV/bin/activate" +cd "$RECIPE_DIR" +export PYTHONPATH="$RECIPE_DIR/src${PYTHONPATH:+:$PYTHONPATH}" +exec python -m evo2_sae.cli "$@" diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py new file mode 100644 index 0000000000..98185dd51a --- /dev/null +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py @@ -0,0 +1,197 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-Apache2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evo2 SAE inference CLI — one engine, four modes. + + serve : start the FastAPI server (one sequence at a time, interactive) + encode : annotate ONE sequence -> top features (stdout JSON) + batch : run a FASTA of MANY sequences -> parquet of per-sequence top features + generate: generate DNA, optionally steering SAE features (stdout JSON) + +They all build the same `Evo2SAE` engine; config comes from flags or env +(EVO2_CKPT_DIR / SAE_CKPT_PATH / FEATURE_ANNOTATIONS / EMBEDDING_LAYER). +""" + +from __future__ import annotations + +import argparse +import json +import os + + +def _add_common(p: argparse.ArgumentParser) -> None: + """Register the shared inference arguments (checkpoints, layer, device) on a parser. + + Defaults come from env vars (``EVO2_CKPT_DIR``, ``SAE_CKPT_PATH``, ``FEATURE_ANNOTATIONS``, + ``EMBEDDING_LAYER``, ``DEVICE``, ``MAX_SEQ_LEN``); pass the flags to override. No hardcoded + paths — the checkpoints must be supplied via flag or env. + + Args: + p: The argparse parser (or subparser) to add the shared arguments to. + + Returns: + None. Mutates ``p`` in place. + """ + p.add_argument("--evo2-ckpt-dir", default=os.environ.get("EVO2_CKPT_DIR")) + p.add_argument("--sae-ckpt-path", default=os.environ.get("SAE_CKPT_PATH")) + p.add_argument("--feature-annotations", default=os.environ.get("FEATURE_ANNOTATIONS")) + p.add_argument("--layer", type=int, default=int(os.environ.get("EMBEDDING_LAYER", "26"))) + p.add_argument("--device", default=os.environ.get("DEVICE", "cuda")) + p.add_argument("--max-seq-len", type=int, default=int(os.environ.get("MAX_SEQ_LEN", "8192"))) + + +def _engine(args): + """Construct an Evo2SAE engine from parsed CLI args. + + Args: + args: Parsed argparse namespace with ``evo2_ckpt_dir``, ``sae_ckpt_path``, ``layer``, + ``device``, ``max_seq_len``, ``feature_annotations``. + + Returns: + An (unloaded) ``Evo2SAE`` instance — call ``.load()`` before use. + """ + from .core import Evo2SAE + + return Evo2SAE( + evo2_ckpt_dir=args.evo2_ckpt_dir, + sae_ckpt_path=args.sae_ckpt_path, + layer=args.layer, + device=args.device, + max_seq_len=args.max_seq_len, + feature_annotations=args.feature_annotations, + ) + + +def _parse_clamps(clamps: list[str]) -> list[dict]: + """Parse repeated ``--clamp FEATURE_ID[:STRENGTH]`` args into [{feature_id, strength}]. + + Strength defaults to 1.0 if omitted (e.g. ``--clamp 29244:300`` or ``--clamp 29244``). + """ + specs = [] + for c in clamps: + fid, sep, strength = c.partition(":") + specs.append({"feature_id": int(fid), "strength": float(strength) if (sep and strength) else 1.0}) + return specs + + +def main(): + """Parse args and dispatch to the serve / encode / batch subcommand.""" + ap = argparse.ArgumentParser(description="Evo2 SAE inference (serve | encode | batch | generate)") + sub = ap.add_subparsers(dest="cmd", required=True) + + ps = sub.add_parser("serve", help="start the FastAPI inference server") + _add_common(ps) + ps.add_argument("--host", default="0.0.0.0") + ps.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8001"))) + + pe = sub.add_parser("encode", help="annotate ONE sequence -> top features (JSON)") + _add_common(pe) + pe.add_argument("--sequence", required=True) + pe.add_argument("--organism", default="None (raw DNA)") + pe.add_argument("--top-k", type=int, default=8) + + pb = sub.add_parser("batch", help="MANY sequences (FASTA) -> parquet of per-sequence top features") + _add_common(pb) + pb.add_argument("--fasta", required=True) + pb.add_argument("--out", required=True) + pb.add_argument("--top-k", type=int, default=16) + pb.add_argument("--batch-size", type=int, default=8) + + pg = sub.add_parser("generate", help="generate DNA, optionally steering SAE features") + _add_common(pg) + pg.add_argument("--prompt", default="", help="DNA to seed; steering applies to the continuation") + pg.add_argument("--organism", default="None (raw DNA)") + pg.add_argument( + "--clamp", + action="append", + default=[], + metavar="FEATURE_ID[:STRENGTH]", + help="clamp a feature on the continuation; repeatable (e.g. --clamp 29244:300). " + "Find feature ids with `encode`.", + ) + pg.add_argument("--n-tokens", type=int, default=120) + pg.add_argument("--temperature", type=float, default=1.0) + pg.add_argument("--top-k", type=int, default=0) + pg.add_argument("--compare-baseline", action="store_true", help="also generate unsteered, for comparison") + + args = ap.parse_args() + + if args.cmd == "serve": + import uvicorn + + from .server import build_app + + uvicorn.run(build_app(_engine(args)), host=args.host, port=args.port, log_level="info") + return + + from .core import clean_dna + + eng = _engine(args).load() + + if args.cmd == "encode": + tag = eng.resolve_tag(args.organism, None) or "" + dna = clean_dna(args.sequence) + codes = eng.encode(tag + dna) + tag_len = len(tag) if codes.shape[0] >= len(tag) else 0 + feats = eng.top_features(codes, tag_len=tag_len, k=args.top_k) + print( + json.dumps( + {"sequence": dna, "organism": args.organism, "bases": len(dna), "top_features": feats}, indent=2 + ) + ) + + elif args.cmd == "batch": + import pandas as pd + + from .fasta import read_fasta + + ids, seqs = [], [] + for sid, seq in read_fasta(args.fasta): + ids.append(sid) + seqs.append(seq) + print(f"[batch] {len(seqs)} sequences from {args.fasta}; encoding (batch_size={args.batch_size})…") + codes_list = eng.encode_batch(seqs, batch_size=args.batch_size) + rows = [] + for sid, codes in zip(ids, codes_list): + for rank, ft in enumerate(eng.top_features(codes, k=args.top_k)): + rows.append({"sequence_id": sid, "bp": int(codes.shape[0]), "rank": rank, **ft}) + df = pd.DataFrame(rows) + df.to_parquet(args.out, index=False) + print(f"[batch] wrote {len(df)} rows for {len(seqs)} sequences -> {args.out}") + + elif args.cmd == "generate": + out = eng.generate( + prompt=args.prompt, + organism=args.organism, + features=_parse_clamps(args.clamp), + n_tokens=args.n_tokens, + temperature=args.temperature, + top_k=args.top_k, + compare_baseline=args.compare_baseline, + ) + result = { + "prompt": out["prompt"], + "organism": out["organism"], + "steered": out["steered"], + "features": out["features"], + "sequence": out["generation"]["sequence"], + } + if out.get("baseline"): + result["baseline_sequence"] = out["baseline"]["sequence"] + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py new file mode 100644 index 0000000000..bb7e3b391d --- /dev/null +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py @@ -0,0 +1,178 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-Apache2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""FastAPI server over the Evo2SAE engine — the live backend the viz talks to. + +Endpoints: /health, /features, /annotate (per-base activations for a pasted +sequence), /generate (autoregressive generation + optional SAE-feature clamp). +This is a thin layer; all model work lives in `core.Evo2SAE`. +""" + +from __future__ import annotations + +import logging +import os +from contextlib import asynccontextmanager +from typing import Optional + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel + +from .core import Evo2SAE, clean_dna + + +logger = logging.getLogger("evo2_sae_infer.server") + + +class AnnotateRequest(BaseModel): + """Request body for /annotate (top-k feature scan or an explicit feature pick).""" + + sequence: str + organism: str = "None (raw DNA)" + tag: Optional[str] = None + mode: str = "topk" # "topk" | "pick" + k: int = 8 + feature_ids: Optional[list[int]] = None + feature_id: Optional[int] = None + + +class FeatureClamp(BaseModel): + """A single SAE-feature steering clamp (feature id + target strength).""" + + feature_id: int + strength: float = 1.0 + + +class GenerateRequest(BaseModel): + """Request body for /generate (autoregressive generation + optional SAE-feature clamps).""" + + prompt: str = "" + organism: str = "None (raw DNA)" + tag: Optional[str] = None + features: list[FeatureClamp] = [] + n_tokens: int = 120 + temperature: float = 1.0 + top_k: int = 0 + compare_baseline: bool = False + + +def build_app(engine: Evo2SAE) -> FastAPI: + """Build the FastAPI app; the engine is loaded once in the lifespan handler.""" + + @asynccontextmanager + async def lifespan(app: FastAPI): + try: + engine.load() + logger.info("engine ready") + except Exception: + logger.exception("engine startup failed — /health stays not-ready") + yield + + app = FastAPI(title="Evo2 SAE inference", lifespan=lifespan) + allowed_origins = os.getenv("CORS_ORIGINS", "*").split(",") # comma-separated; "*" by default (local backend) + app.add_middleware(CORSMiddleware, allow_origins=allowed_origins, allow_methods=["*"], allow_headers=["*"]) + + @app.get("/health") + def health(): + return { + "ready": bool(engine.ready), + "layer": engine.layer, + "n_features": engine.n_features, + "n_labels": len(engine.labels), + "sae_path": engine.sae_ckpt_path, + "organisms": list(engine.organism_tags.keys()), + "organism_tags": engine.organism_tags, + "device": engine.device, + } + + @app.get("/features") + def features(): + if not engine.ready: + raise HTTPException(503, "Backend not ready") + rows = [ + {"id": int(f), "label": lab, "natural_peak": engine.peaks.get(int(f))} for f, lab in engine.labels.items() + ] + rows.sort(key=lambda r: r["id"]) + return rows + + @app.post("/annotate") + def annotate(req: AnnotateRequest): + if not engine.ready: + raise HTTPException(503, "Backend not ready") + dna = clean_dna(req.sequence) + if not dna: + raise HTTPException(400, "No valid nucleotides in sequence") + tag = engine.resolve_tag(req.organism, req.tag) + if tag is None: + raise HTTPException(400, f"Unknown organism '{req.organism}' and no custom tag") + full = tag + dna + tag_len = len(tag) + codes = engine.encode(full) # [S, n_features], lock held inside + if codes.shape[0] < tag_len: + tag_len = 0 + if req.mode not in ("pick", "topk"): + raise HTTPException(400, f"Invalid mode {req.mode!r}: must be 'pick' or 'topk'") + if req.mode == "pick": + ids = req.feature_ids or ([req.feature_id] if req.feature_id is not None else []) + if not ids: + raise HTTPException(400, "mode='pick' requires feature_ids") + chosen = [int(i) for i in ids] + else: + k = max(1, min(int(req.k), 64)) + chosen = [ft["feature_id"] for ft in engine.top_features(codes, tag_len=tag_len, k=k)] + feats = [] + for fid in chosen: + col = codes[:, fid] + feats.append( + { + "feature_id": fid, + "label": engine.labels.get(fid), + "max_activation": float(col[tag_len:].max().item()) + if codes.shape[0] > tag_len + else float(col.max().item()), + "activations": [round(float(v), 4) for v in col.tolist()], + } + ) + return { + "sequence": dna, + "organism": req.organism, + "tag": tag, + "tag_len": tag_len, + "bases": list(full), + "n_tokens": codes.shape[0], + "layer": engine.layer, + "features": feats, + } + + @app.post("/generate") + def generate(req: GenerateRequest): + if not engine.ready: + raise HTTPException(503, "Backend not ready") + try: + return engine.generate( + prompt=req.prompt, + organism=req.organism, + tag=req.tag, + features=[f.model_dump() for f in req.features], + n_tokens=req.n_tokens, + temperature=req.temperature, + top_k=req.top_k, + compare_baseline=req.compare_baseline, + ) + except ValueError as e: + raise HTTPException(400, str(e)) + + return app diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py new file mode 100644 index 0000000000..82b5b0726b --- /dev/null +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/tests/test_server.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-Apache2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Server contract tests — the API the feature-explorer viz consumes. + +A mocked engine (no model, CPU-only) drives the FastAPI app so these run in CI and lock the +response shapes + error codes the dashboard depends on: /health, /features, /annotate (per-base +activations), /generate. Real model inference is covered by test_steering.py. +""" + +import pytest +import torch +from evo2_sae.server import build_app +from fastapi.testclient import TestClient + + +class FakeEngine: + """Minimal stand-in for Evo2SAE exposing only what the server endpoints touch.""" + + def __init__(self): + self.ready = True + self.layer = 19 + self.n_features = 4 + self.labels = {0: "feat0", 1: "feat1"} + self.peaks = {0: 0.5} + self.organism_tags = {"None (raw DNA)": "", "Human": "|tag|"} + self.device = "cpu" + self.sae_ckpt_path = "fake.pt" + + def load(self): + self.ready = True + + def resolve_tag(self, organism, tag): + return tag if tag is not None else self.organism_tags.get(organism) + + def encode(self, full): + codes = torch.zeros(len(full), self.n_features) + codes[:, 0] = 1.0 # feature 0 fires everywhere + return codes + + def top_features(self, codes, tag_len=0, k=8): + return [{"feature_id": 0, "label": self.labels.get(0), "max_activation": 1.0}] + + def generate(self, **kw): + if not kw.get("prompt") and kw.get("organism") == "None (raw DNA)" and not kw.get("tag"): + raise ValueError("need a seed") + return { + "generation": {"sequence": "ACGT", "activations": {0: [1.0, 1.0, 1.0, 1.0]}}, + "baseline": None, + "features": [], + "steered": False, + } + + +@pytest.fixture +def client(): + with TestClient(build_app(FakeEngine())) as c: + yield c + + +def test_health(client): + b = client.get("/health").json() + assert b["ready"] is True and b["layer"] == 19 + assert "None (raw DNA)" in b["organisms"] + + +def test_features(client): + rows = client.get("/features").json() + assert {"id", "label", "natural_peak"} <= set(rows[0]) + + +def test_annotate_returns_per_base_activations(client): + b = client.post("/annotate", json={"sequence": "ACGTACGT", "organism": "None (raw DNA)"}).json() + assert {"sequence", "features", "bases", "tag_len", "layer", "n_tokens"} <= set(b) + assert b["features"][0]["activations"] # the per-base track the viz plots + + +def test_annotate_rejects_non_dna(client): + assert client.post("/annotate", json={"sequence": "ZZZZ"}).status_code == 400 + + +def test_generate_returns_sequence(client): + b = client.post("/generate", json={"prompt": "ACGT", "organism": "None (raw DNA)"}).json() + assert b["generation"]["sequence"] + + +def test_endpoints_503_until_ready(): + eng = FakeEngine() + eng.ready = False + eng.load = lambda: None # startup leaves it not-ready + with TestClient(build_app(eng)) as c: + assert c.get("/features").status_code == 503 + assert c.post("/annotate", json={"sequence": "ACGT"}).status_code == 503 + assert c.post("/generate", json={"prompt": "ACGT", "organism": "None (raw DNA)"}).status_code == 503 From 76ef59c8c693ade82d8ffdccf0589a9477eecd72 Mon Sep 17 00:00:00 2001 From: Polina Binder Date: Fri, 12 Jun 2026 04:22:54 +0000 Subject: [PATCH 2/4] evo2 serve: DRY the 'backend not ready' 503 guard into _require_ready() /features, /annotate, /generate each repeated the readiness check + message; one build_app-local _require_ready() helper. test_server (503-until-ready) still green. Signed-off-by: Polina Binder --- .../recipes/evo2/src/evo2_sae/server.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py index bb7e3b391d..87973340d4 100644 --- a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py @@ -85,6 +85,9 @@ async def lifespan(app: FastAPI): allowed_origins = os.getenv("CORS_ORIGINS", "*").split(",") # comma-separated; "*" by default (local backend) app.add_middleware(CORSMiddleware, allow_origins=allowed_origins, allow_methods=["*"], allow_headers=["*"]) + def _require_ready(): + _require_ready() + @app.get("/health") def health(): return { @@ -100,8 +103,7 @@ def health(): @app.get("/features") def features(): - if not engine.ready: - raise HTTPException(503, "Backend not ready") + _require_ready() rows = [ {"id": int(f), "label": lab, "natural_peak": engine.peaks.get(int(f))} for f, lab in engine.labels.items() ] @@ -110,8 +112,7 @@ def features(): @app.post("/annotate") def annotate(req: AnnotateRequest): - if not engine.ready: - raise HTTPException(503, "Backend not ready") + _require_ready() dna = clean_dna(req.sequence) if not dna: raise HTTPException(400, "No valid nucleotides in sequence") @@ -159,8 +160,7 @@ def annotate(req: AnnotateRequest): @app.post("/generate") def generate(req: GenerateRequest): - if not engine.ready: - raise HTTPException(503, "Backend not ready") + _require_ready() try: return engine.generate( prompt=req.prompt, From 9fd49ed41dbc4d0b9ca1b3b9524baf750f907f72 Mon Sep 17 00:00:00 2001 From: Polina Binder Date: Fri, 12 Jun 2026 04:23:47 +0000 Subject: [PATCH 3/4] fix: _require_ready recursion (replace_all clobbered its own body) The previous commit's replace_all rewrote _require_ready's body into a self-call -> RecursionError. Restore the real readiness check. test_server green again. Signed-off-by: Polina Binder --- .../sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py index 87973340d4..cefe5fdc1e 100644 --- a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/server.py @@ -86,7 +86,8 @@ async def lifespan(app: FastAPI): app.add_middleware(CORSMiddleware, allow_origins=allowed_origins, allow_methods=["*"], allow_headers=["*"]) def _require_ready(): - _require_ready() + if not engine.ready: + raise HTTPException(503, "Backend not ready") @app.get("/health") def health(): From 6e1b7d7e707401d6d850913750049ee9aa21338c Mon Sep 17 00:00:00 2001 From: Polina Binder Date: Fri, 12 Jun 2026 05:37:55 +0000 Subject: [PATCH 4/4] style(core): ruff-format blank line after in-function import (fix pre-commit CI) Co-Authored-By: Claude Opus 4.8 Signed-off-by: Polina Binder --- .../sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py index 00035d9b7f..6cc72cc80b 100644 --- a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py +++ b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/core.py @@ -353,6 +353,7 @@ def generate( comp = self._ensure_engine() hook_layer = unwrap_model(comp.model).decoder.layers[self.layer] from sae.steering import clamp_hook + feat_meta = [{"id": fid, "label": self.labels.get(fid), "strength": s} for fid, s in clamps.items()] def _run(steer: bool) -> str: