NVIDIA-BioNeMo · polinabinder1 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Launch the Evo2 SAE inference engine. One engine, four modes:
+#
+#   ./launch_inference.sh serve                       # live HTTP server on :8001 (viz backend)
+#   ./launch_inference.sh encode  --sequence ATGC...  # annotate ONE sequence -> top features
+#   ./launch_inference.sh batch   --fasta in.fa --out out.parquet   # MANY sequences -> parquet
+#   ./launch_inference.sh generate --prompt ATGC... --clamp 29244:300  # steer + generate DNA
+#
+# Steering loop: `encode` a sequence to find an active feature id, then
+# `generate --clamp ID:STRENGTH` (strength ~2-3x the feature's max_activation; repeat --clamp).
+#
+# Config via env. Required: EVO2_CKPT_DIR, SAE_CKPT_PATH. Optional (have defaults):
+# FEATURE_ANNOTATIONS, EMBEDDING_LAYER (26), DEVICE, PORT, CUDA_VISIBLE_DEVICES.
+#
+# Requires the evo2_megatron recipe venv (provides bionemo.evo2 + megatron).
+set -euo pipefail
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+RECIPE_DIR="$(cd "$HERE/.." && pwd)"  # recipes/evo2 — so the evo2_sae package imports
+
+# Required (no hardcoded defaults — supply your own paths via env):
+VENV="${VENV:?Set VENV to the evo2_megatron recipe .venv (provides bionemo.evo2 + megatron)}"
+export EVO2_CKPT_DIR="${EVO2_CKPT_DIR:?Set EVO2_CKPT_DIR to an Evo2 MBridge checkpoint directory}"
+export SAE_CKPT_PATH="${SAE_CKPT_PATH:?Set SAE_CKPT_PATH to a trained SAE checkpoint (.pt)}"
+# Optional: feature-label parquet (empty = features are unlabeled). Layer defaults to 26.
+export FEATURE_ANNOTATIONS="${FEATURE_ANNOTATIONS:-}"
+export EMBEDDING_LAYER="${EMBEDDING_LAYER:-26}"
+
+if [[ ! -x "$VENV/bin/python" ]]; then
+  echo "ERROR: evo2_megatron venv not found at $VENV (build it with the recipe's .ci_build.sh)" >&2
+  exit 1
+fi
+
+source "$VENV/bin/activate"
+cd "$RECIPE_DIR"
+export PYTHONPATH="$RECIPE_DIR/src${PYTHONPATH:+:$PYTHONPATH}"
+exec python -m evo2_sae.cli "$@"
@@ -0,0 +1,197 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-Apache2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Evo2 SAE inference CLI — one engine, four modes.
+
+    serve   : start the FastAPI server (one sequence at a time, interactive)
+    encode  : annotate ONE sequence -> top features (stdout JSON)
+    batch   : run a FASTA of MANY sequences -> parquet of per-sequence top features
+    generate: generate DNA, optionally steering SAE features (stdout JSON)
+
+They all build the same `Evo2SAE` engine; config comes from flags or env
+(EVO2_CKPT_DIR / SAE_CKPT_PATH / FEATURE_ANNOTATIONS / EMBEDDING_LAYER).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+
+
+def _add_common(p: argparse.ArgumentParser) -> None:
+    """Register the shared inference arguments (checkpoints, layer, device) on a parser.
+
+    Defaults come from env vars (``EVO2_CKPT_DIR``, ``SAE_CKPT_PATH``, ``FEATURE_ANNOTATIONS``,
+    ``EMBEDDING_LAYER``, ``DEVICE``, ``MAX_SEQ_LEN``); pass the flags to override. No hardcoded
+    paths — the checkpoints must be supplied via flag or env.
+
+    Args:
+        p: The argparse parser (or subparser) to add the shared arguments to.
+
+    Returns:
+        None. Mutates ``p`` in place.
+    """
+    p.add_argument("--evo2-ckpt-dir", default=os.environ.get("EVO2_CKPT_DIR"))
+    p.add_argument("--sae-ckpt-path", default=os.environ.get("SAE_CKPT_PATH"))
+    p.add_argument("--feature-annotations", default=os.environ.get("FEATURE_ANNOTATIONS"))
+    p.add_argument("--layer", type=int, default=int(os.environ.get("EMBEDDING_LAYER", "26")))
+    p.add_argument("--device", default=os.environ.get("DEVICE", "cuda"))
+    p.add_argument("--max-seq-len", type=int, default=int(os.environ.get("MAX_SEQ_LEN", "8192")))
+
+
+def _engine(args):
+    """Construct an Evo2SAE engine from parsed CLI args.
+
+    Args:
+        args: Parsed argparse namespace with ``evo2_ckpt_dir``, ``sae_ckpt_path``, ``layer``,
+            ``device``, ``max_seq_len``, ``feature_annotations``.
+
+    Returns:
+        An (unloaded) ``Evo2SAE`` instance — call ``.load()`` before use.
+    """
+    from .core import Evo2SAE
+
+    return Evo2SAE(
+        evo2_ckpt_dir=args.evo2_ckpt_dir,
+        sae_ckpt_path=args.sae_ckpt_path,
+        layer=args.layer,
+        device=args.device,
+        max_seq_len=args.max_seq_len,
+        feature_annotations=args.feature_annotations,
+    )
+
+
+def _parse_clamps(clamps: list[str]) -> list[dict]:
+    """Parse repeated ``--clamp FEATURE_ID[:STRENGTH]`` args into [{feature_id, strength}].
+
+    Strength defaults to 1.0 if omitted (e.g. ``--clamp 29244:300`` or ``--clamp 29244``).
+    """
+    specs = []
+    for c in clamps:
+        fid, sep, strength = c.partition(":")
+        specs.append({"feature_id": int(fid), "strength": float(strength) if (sep and strength) else 1.0})
+    return specs
+
+
+def main():
+    """Parse args and dispatch to the serve / encode / batch subcommand."""
+    ap = argparse.ArgumentParser(description="Evo2 SAE inference (serve | encode | batch | generate)")
+    sub = ap.add_subparsers(dest="cmd", required=True)
+
+    ps = sub.add_parser("serve", help="start the FastAPI inference server")
+    _add_common(ps)
+    ps.add_argument("--host", default="0.0.0.0")
+    ps.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8001")))
+
+    pe = sub.add_parser("encode", help="annotate ONE sequence -> top features (JSON)")
+    _add_common(pe)
+    pe.add_argument("--sequence", required=True)
+    pe.add_argument("--organism", default="None (raw DNA)")
+    pe.add_argument("--top-k", type=int, default=8)
+
+    pb = sub.add_parser("batch", help="MANY sequences (FASTA) -> parquet of per-sequence top features")
+    _add_common(pb)
+    pb.add_argument("--fasta", required=True)
+    pb.add_argument("--out", required=True)
+    pb.add_argument("--top-k", type=int, default=16)
+    pb.add_argument("--batch-size", type=int, default=8)
+
+    pg = sub.add_parser("generate", help="generate DNA, optionally steering SAE features")
+    _add_common(pg)
+    pg.add_argument("--prompt", default="", help="DNA to seed; steering applies to the continuation")
+    pg.add_argument("--organism", default="None (raw DNA)")
+    pg.add_argument(
+        "--clamp",
+        action="append",
+        default=[],
+        metavar="FEATURE_ID[:STRENGTH]",
+        help="clamp a feature on the continuation; repeatable (e.g. --clamp 29244:300). "
+        "Find feature ids with `encode`.",
+    )
+    pg.add_argument("--n-tokens", type=int, default=120)
+    pg.add_argument("--temperature", type=float, default=1.0)
+    pg.add_argument("--top-k", type=int, default=0)
+    pg.add_argument("--compare-baseline", action="store_true", help="also generate unsteered, for comparison")
+
+    args = ap.parse_args()
+
+    if args.cmd == "serve":
+        import uvicorn
+
+        from .server import build_app
+
+        uvicorn.run(build_app(_engine(args)), host=args.host, port=args.port, log_level="info")
+        return
+
+    from .core import clean_dna
+
+    eng = _engine(args).load()
+
+    if args.cmd == "encode":
+        tag = eng.resolve_tag(args.organism, None) or ""
+        dna = clean_dna(args.sequence)
+        codes = eng.encode(tag + dna)
+        tag_len = len(tag) if codes.shape[0] >= len(tag) else 0
+        feats = eng.top_features(codes, tag_len=tag_len, k=args.top_k)
+        print(
+            json.dumps(
+                {"sequence": dna, "organism": args.organism, "bases": len(dna), "top_features": feats}, indent=2
+            )
+        )
+
+    elif args.cmd == "batch":
+        import pandas as pd
+
+        from .fasta import read_fasta
+
+        ids, seqs = [], []
+        for sid, seq in read_fasta(args.fasta):
+            ids.append(sid)
+            seqs.append(seq)
+        print(f"[batch] {len(seqs)} sequences from {args.fasta}; encoding (batch_size={args.batch_size})…")
+        codes_list = eng.encode_batch(seqs, batch_size=args.batch_size)
+        rows = []
+        for sid, codes in zip(ids, codes_list):
+            for rank, ft in enumerate(eng.top_features(codes, k=args.top_k)):
+                rows.append({"sequence_id": sid, "bp": int(codes.shape[0]), "rank": rank, **ft})
+        df = pd.DataFrame(rows)
+        df.to_parquet(args.out, index=False)
+        print(f"[batch] wrote {len(df)} rows for {len(seqs)} sequences -> {args.out}")
+
+    elif args.cmd == "generate":
+        out = eng.generate(
+            prompt=args.prompt,
+            organism=args.organism,
+            features=_parse_clamps(args.clamp),
+            n_tokens=args.n_tokens,
+            temperature=args.temperature,
+            top_k=args.top_k,
+            compare_baseline=args.compare_baseline,
+        )
+        result = {
+            "prompt": out["prompt"],
+            "organism": out["organism"],
+            "steered": out["steered"],
+            "features": out["features"],
+            "sequence": out["generation"]["sequence"],
+        }
+        if out.get("baseline"):
+            result["baseline_sequence"] = out["baseline"]["sequence"]
+        print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+    main()
@@ -353,6 +353,7 @@ def generate(
             comp = self._ensure_engine()
             hook_layer = unwrap_model(comp.model).decoder.layers[self.layer]
             from sae.steering import clamp_hook
+
             feat_meta = [{"id": fid, "label": self.labels.get(fid), "strength": s} for fid, s in clamps.items()]
 
             def _run(steer: bool) -> str: