NVIDIA-BioNeMo · polinabinder1 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
@@ -13,13 +13,15 @@ dependencies = [
     "torch>=2.0",
     "numpy>=1.20",
     "pyarrow>=23.0.0",
+    "fastapi>=0.110",
+    "uvicorn>=0.29",
+    "pandas>=1.5",
 ]
 
-# No package code lives here yet — the recipe is just an entry-point for
-# scripts/ that depends on the shared `sae` workspace package. Declare no
-# packages so setuptools doesn't try to discover anything.
-[tool.setuptools]
-packages = []
+# The `evo2_sae` package (src/) holds the live inference engine + server + CLI;
+# scripts/ (extract, train) are standalone entry points alongside it.
+[tool.setuptools.packages.find]
+where = ["src"]
 
 [tool.uv.sources]
 sae = { workspace = true }
diff --git a/...emo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh b/...emo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/launch_inference.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Launch the Evo2 SAE inference engine. One engine, three modes:
+#
+#   ./launch_inference.sh serve                       # live HTTP server on :8001 (viz backend)
+#   ./launch_inference.sh encode  --sequence ATGC...  # annotate ONE sequence -> top features
+#   ./launch_inference.sh batch   --fasta in.fa --out out.parquet   # MANY sequences -> parquet
+#
+# Config via env (sensible defaults below): EVO2_CKPT_DIR, SAE_CKPT_PATH,
+# FEATURE_ANNOTATIONS, EMBEDDING_LAYER, DEVICE, PORT, CUDA_VISIBLE_DEVICES.
+#
+# Requires the evo2_megatron recipe venv (provides bionemo.evo2 + megatron).
+set -euo pipefail
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+RECIPE_DIR="$(cd "$HERE/.." && pwd)"  # recipes/evo2 — so the evo2_sae package imports
+
+VENV="${VENV:-/data/pbinder/bionemo-framework/bionemo-recipes/recipes/evo2_megatron/.venv}"
+export EVO2_CKPT_DIR="${EVO2_CKPT_DIR:-/data/interp/evo2/checkpoints/evo2_7b_mbridge}"
+export SAE_CKPT_PATH="${SAE_CKPT_PATH:-/data/interp/evo2/sae/v2_diverse/layer26_7B_ablate_normalize_input/checkpoints/checkpoint_final.pt}"
+export FEATURE_ANNOTATIONS="${FEATURE_ANNOTATIONS:-/data/interp/evo2/sae_eval/dashboard_data/l26_7B_normalize/feature_metadata.parquet}"
+export EMBEDDING_LAYER="${EMBEDDING_LAYER:-26}"
+
+if [[ ! -x "$VENV/bin/python" ]]; then
+  echo "ERROR: evo2_megatron venv not found at $VENV (build it with the recipe's .ci_build.sh)" >&2
+  exit 1
+fi
+
+source "$VENV/bin/activate"
+cd "$RECIPE_DIR"
+export PYTHONPATH="$RECIPE_DIR/src${PYTHONPATH:+:$PYTHONPATH}"
+exec python -m evo2_sae.cli "$@"
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Evo2 + SAE inference engine — reused by the live server, the batch CLI, and the viz backend."""
+
+from .core import DEFAULT_ORGANISM_TAGS, Evo2SAE, clean_dna
+
+
+__all__ = ["DEFAULT_ORGANISM_TAGS", "Evo2SAE", "clean_dna"]
diff --git a/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py b/bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/src/evo2_sae/cli.py
@@ -0,0 +1,155 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Evo2 SAE inference CLI — one engine, three modes.
+
+    serve   : start the FastAPI server (one sequence at a time, interactive)
+    encode  : annotate ONE sequence -> top features (stdout JSON)
+    batch   : run a FASTA of MANY sequences -> parquet of per-sequence top features
+
+All three build the same `Evo2SAE` engine; config comes from flags or env
+(EVO2_CKPT_DIR / SAE_CKPT_PATH / FEATURE_ANNOTATIONS / EMBEDDING_LAYER).
+"""
+
+from __future__ import annotations
+
+import argparse
+import gzip
+import json
+import os
+
+
+def _add_common(p: argparse.ArgumentParser) -> None:
+    p.add_argument(
+        "--evo2-ckpt-dir",
+        default=os.environ.get("EVO2_CKPT_DIR", "/data/interp/evo2/checkpoints/evo2_1b_base_mbridge"),
+    )
+    p.add_argument(
+        "--sae-ckpt-path",
+        default=os.environ.get(
+            "SAE_CKPT_PATH", "/data/interp/evo2/sae/v2_diverse/layer19_C13_nofilter/checkpoints/checkpoint_final.pt"
+        ),
+    )
+    p.add_argument(
+        "--feature-annotations",
+        default=os.environ.get(
+            "FEATURE_ANNOTATIONS",
+            "/data/interp/evo2/sae_eval/dashboard_data/l19_C13_nofilter/feature_metadata.parquet",
+        ),
+    )
+    p.add_argument("--layer", type=int, default=int(os.environ.get("EMBEDDING_LAYER", "19")))
+    p.add_argument("--device", default=os.environ.get("DEVICE", "cuda"))
+    p.add_argument("--max-seq-len", type=int, default=int(os.environ.get("MAX_SEQ_LEN", "8192")))
+
+
+def _engine(args):
+    from .core import Evo2SAE
+
+    return Evo2SAE(
+        evo2_ckpt_dir=args.evo2_ckpt_dir,
+        sae_ckpt_path=args.sae_ckpt_path,
+        layer=args.layer,
+        device=args.device,
+        max_seq_len=args.max_seq_len,
+        feature_annotations=args.feature_annotations,
+    )
+
+
+def _read_fasta(path: str):
+    seqs, ids = [], []
+    name, parts = None, []
+    opener = gzip.open if str(path).endswith(".gz") else open
+    with opener(path, "rt") as f:
+        for line in f:
+            line = line.rstrip()
+            if line.startswith(">"):
+                if name is not None:
+                    seqs.append("".join(parts))
+                    ids.append(name)
+                name, parts = line[1:].split()[0] if len(line) > 1 else f"seq_{len(ids)}", []
+            else:
+                parts.append(line)
+    if name is not None:
+        seqs.append("".join(parts))
+        ids.append(name)
+    return ids, seqs
+
+
+def main():
+    """Parse args and dispatch to the serve / encode / batch subcommand."""
+    ap = argparse.ArgumentParser(description="Evo2 SAE inference (serve | encode | batch)")
+    sub = ap.add_subparsers(dest="cmd", required=True)
+
+    ps = sub.add_parser("serve", help="start the FastAPI inference server")
+    _add_common(ps)
+    ps.add_argument("--host", default="0.0.0.0")
+    ps.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8001")))
+
+    pe = sub.add_parser("encode", help="annotate ONE sequence -> top features (JSON)")
+    _add_common(pe)
+    pe.add_argument("--sequence", required=True)
+    pe.add_argument("--organism", default="None (raw DNA)")
+    pe.add_argument("--top-k", type=int, default=8)
+
+    pb = sub.add_parser("batch", help="MANY sequences (FASTA) -> parquet of per-sequence top features")
+    _add_common(pb)
+    pb.add_argument("--fasta", required=True)
+    pb.add_argument("--out", required=True)
+    pb.add_argument("--top-k", type=int, default=16)
+    pb.add_argument("--batch-size", type=int, default=8)
+
+    args = ap.parse_args()
+
+    if args.cmd == "serve":
+        import uvicorn
+
+        from .server import build_app
+
+        uvicorn.run(build_app(_engine(args)), host=args.host, port=args.port, log_level="info")
+        return
+
+    from .core import clean_dna
+
+    eng = _engine(args).load()
+
+    if args.cmd == "encode":
+        tag = eng.resolve_tag(args.organism, None) or ""
+        dna = clean_dna(args.sequence)
+        codes = eng.encode(tag + dna)
+        tag_len = len(tag) if codes.shape[0] >= len(tag) else 0
+        feats = eng.top_features(codes, tag_len=tag_len, k=args.top_k)
+        print(
+            json.dumps(
+                {"sequence": dna, "organism": args.organism, "bases": len(dna), "top_features": feats}, indent=2
+            )
+        )
+
+    elif args.cmd == "batch":
+        import pandas as pd
+
+        ids, seqs = _read_fasta(args.fasta)
+        print(f"[batch] {len(seqs)} sequences from {args.fasta}; encoding (batch_size={args.batch_size})…")
+        codes_list = eng.encode_batch(seqs, batch_size=args.batch_size)
+        rows = []
+        for sid, codes in zip(ids, codes_list):
+            for rank, ft in enumerate(eng.top_features(codes, k=args.top_k)):
+                rows.append({"sequence_id": sid, "bp": int(codes.shape[0]), "rank": rank, **ft})
+        df = pd.DataFrame(rows)
+        df.to_parquet(args.out, index=False)
+        print(f"[batch] wrote {len(df)} rows for {len(seqs)} sequences -> {args.out}")
+
+
+if __name__ == "__main__":
+    main()