diff --git a/prmi/src/cli.rs b/prmi/src/cli.rs
index f396d01..7acecca 100644
--- a/prmi/src/cli.rs
+++ b/prmi/src/cli.rs
@@ -174,6 +174,43 @@ pub enum Cmd {
         #[arg(long, default_value_t = 1)]
         threads: usize,
     },
+    /// Build a Design-Z keep-set BED from a reference + per-contig target BED.
+    ///
+    /// Emits a FLAT-coordinate keep-set (targets kept whole + the start
+    /// positions of forward/reverse-complement k-mer homologs of target k-mers
+    /// under the count cap) suitable for `prmi build --keep-bed`. The output is
+    /// in prmi's concatenated-genome coordinate space, NOT per-contig.
+    Keepset {
+        /// Reference FASTA (same contig order `prmi build` will use).
+        #[arg(long, value_name = "PATH")]
+        r#ref: PathBuf,
+        /// Per-contig BED3 of target regions (chrom must match FASTA names).
+        #[arg(long, value_name = "PATH")]
+        targets: PathBuf,
+        /// Output keep-set BED path (`-` for stdout).
+        #[arg(short = 'o', long, value_name = "PATH", default_value = "-")]
+        out: PathBuf,
+        /// k-mer length for homology detection (1..=32). Default provisional
+        /// pending the item-3 benchmark.
+        #[arg(short = 'k', long, default_value_t = 32)]
+        k: usize,
+        /// Drop target k-mers whose total reference count exceeds this cap;
+        /// `0` = uncapped (keep every homolog).
+        #[arg(long, default_value_t = 0)]
+        max_occ: u64,
+        /// Extend each kept run by this many bp on both sides before merging.
+        #[arg(long, default_value_t = 0)]
+        flank: u64,
+        /// Emit compact BED12 (blocks) instead of one BED3 line per run.
+        #[arg(long, default_value_t = false)]
+        bed12: bool,
+        /// BED12 only: maximum blocks packed per line.
+        #[arg(long, default_value_t = 1000)]
+        blocks_per_line: usize,
+        /// Cosmetic chrom label for the flat-coordinate output BED.
+        #[arg(long, default_value = "genome")]
+        chrom_label: String,
+    },
 }
 
 /// Subcommands for `prmi shm`.
@@ -366,6 +403,86 @@ impl Cli {
                 println!("OK: {n} SA entries certified against the lexicographic oracle");
                 Ok(())
             }
+            Cmd::Keepset {
+                r#ref,
+                targets,
+                out,
+                k,
+                max_occ,
+                flank,
+                bed12,
+                blocks_per_line,
+                chrom_label,
+            } => {
+                // Validate up front (before reading the reference, which may be
+                // whole-genome) so an invalid knob fails fast with a clear error.
+                // These are user-input violations, so preserve the crate's
+                // `InvalidInput` categorization (matching the --with-isa guard
+                // above) rather than a generic anyhow error.
+                if !(1..=32).contains(&k) {
+                    return Err(crate::Error::InvalidInput {
+                        detail: format!("--k must be in 1..=32, got {k}"),
+                    }
+                    .into());
+                }
+                if blocks_per_line < 1 {
+                    return Err(crate::Error::InvalidInput {
+                        detail: format!("--blocks-per-line must be >= 1, got {blocks_per_line}"),
+                    }
+                    .into());
+                }
+                let opts = crate::keepset::KeepsetOptions {
+                    k,
+                    max_occ,
+                    flank,
+                    format: if bed12 {
+                        crate::keepset::KeepsetFormat::Bed12
+                    } else {
+                        crate::keepset::KeepsetFormat::Bed3
+                    },
+                    blocks_per_line,
+                    chrom_label,
+                };
+                // Stream to stdout for "-", else to the named file.
+                let ctx = || {
+                    format!(
+                        "building keep-set from {} + {}",
+                        r#ref.display(),
+                        targets.display()
+                    )
+                };
+                // Flush the BufWriter explicitly in each branch: `drop` swallows
+                // flush errors, so a disk-full / broken-pipe on the final flush
+                // would otherwise print success and return Ok.
+                let stats = if out == std::path::Path::new("-") {
+                    let stdout = std::io::stdout();
+                    let mut w = std::io::BufWriter::new(stdout.lock());
+                    let stats = crate::keepset::build_keepset(&r#ref, &targets, &opts, &mut w)
+                        .with_context(ctx)?;
+                    std::io::Write::flush(&mut w).context("flushing keep-set output to stdout")?;
+                    stats
+                } else {
+                    let f = std::fs::File::create(&out)
+                        .with_context(|| format!("creating {}", out.display()))?;
+                    let mut w = std::io::BufWriter::new(f);
+                    let stats = crate::keepset::build_keepset(&r#ref, &targets, &opts, &mut w)
+                        .with_context(ctx)?;
+                    std::io::Write::flush(&mut w).with_context(|| {
+                        format!("flushing keep-set output to {}", out.display())
+                    })?;
+                    stats
+                };
+                eprintln!(
+                    "keepset: genome={} bp, {} target intervals -> {} kept runs ({} bp, {:.3}%), {} BED lines",
+                    stats.genome_len,
+                    stats.target_intervals,
+                    stats.kept_runs,
+                    stats.kept_bp,
+                    100.0 * stats.kept_bp as f64 / stats.genome_len.max(1) as f64,
+                    stats.lines_written,
+                );
+                Ok(())
+            }
             Cmd::Shm { cmd } => match cmd {
                 ShmCmd::Load {
                     sidecar_prefix,
diff --git a/prmi/src/keepset.rs b/prmi/src/keepset.rs
new file mode 100644
index 0000000..50c9397
--- /dev/null
+++ b/prmi/src/keepset.rs
@@ -0,0 +1,778 @@
+// Copyright (C) 2026 Fulcrum Genomics LLC
+// SPDX-License-Identifier: MIT
+
+//! Keep-set construction (Design Z, item 2): turn a per-contig target BED plus a
+//! reference into a flat-coordinate keep-set BED consumable by
+//! `prmi build --keep-bed`.
+//!
+//! The keep-set is the union of:
+//! - **Targets** — every position of each target interval (kept whole).
+//! - **k-mer homology** — the START position of every reference k-mer that is a
+//!   forward or reverse-complement copy of a k-mer occurring at a target
+//!   position, provided that k-mer's reference count is `<= max_occ` (high-copy
+//!   repeats are dropped — they fall back to the off-target engine regardless).
+//!   Only the suffix-START position of each homologous copy is kept, because the
+//!   tiered `.sa` retains an entry iff its suffix-start coordinate is in the
+//!   keep-set; keeping the whole `[p, p+k)` span would over-keep on merge.
+//!
+//! Coordinate space: the output BED is in prmi's FLAT concatenated-genome
+//! coordinate space (contigs concatenated in FASTA order, one position per base,
+//! N included) — the same space `prmi build` operates in and `parse_bed`
+//! consumes. The chrom column is a cosmetic label; positions are flat offsets.
+//!
+//! Output format: [`KeepsetFormat::Bed3`] emits one line per maximal kept run
+//! (precise, but potentially many lines); [`KeepsetFormat::Bed12`] packs runs
+//! into block-bearing lines (compact). Both decode to the identical kept
+//! position set via `parse_bed`.
+//!
+//! Limitation: homology is detected with a single fixed `k` over exact matches
+//! (forward + reverse-complement). It does not capture gapped/SNP-divergent
+//! paralogy; that is by design — the off-target fallback serves those reads.
+
+use crate::encoding::base_to_2bit;
+use crate::error::{Error, Result};
+use noodles_fasta::io::Reader;
+use std::collections::{HashMap, HashSet};
+use std::io::{BufReader, Write};
+use std::path::Path;
+
+/// A non-N base reads as 0..=3; this sentinel marks N / non-IUPAC, which resets
+/// the rolling k-mer (no k-mer window may span an N).
+const N_SENTINEL: u8 = 4;
+
+/// Output BED flavor for the keep-set.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum KeepsetFormat {
+    /// One BED3 line per maximal kept run.
+    Bed3,
+    /// BED12 lines packing up to `blocks_per_line` runs as blocks each.
+    Bed12,
+}
+
+/// Options controlling keep-set construction.
+#[derive(Debug, Clone)]
+pub struct KeepsetOptions {
+    /// k-mer length used to detect homology (1..=32). Default provisional
+    /// pending the item-3 benchmark.
+    pub k: usize,
+    /// Drop target k-mers whose total reference count (forward + reverse
+    /// complement, counted on the strand-canonical key) exceeds this cap —
+    /// high-copy repeats, including inverted ones, fall back regardless. `0`
+    /// means uncapped.
+    pub max_occ: u64,
+    /// Extend every kept run by this many bp on each side before merging. `0`
+    /// keeps the set position-precise.
+    pub flank: u64,
+    /// Output BED flavor.
+    pub format: KeepsetFormat,
+    /// BED12 only: maximum blocks packed into a single line.
+    pub blocks_per_line: usize,
+    /// Cosmetic chrom label written to the (flat-coordinate) output BED.
+    pub chrom_label: String,
+}
+
+impl Default for KeepsetOptions {
+    fn default() -> Self {
+        Self {
+            k: 32,
+            max_occ: 0,
+            flank: 0,
+            format: KeepsetFormat::Bed3,
+            blocks_per_line: 1000,
+            chrom_label: "genome".to_string(),
+        }
+    }
+}
+
+/// Summary statistics returned by [`build_keepset`].
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub struct KeepsetStats {
+    /// Flat genome length (sum of contig lengths).
+    pub genome_len: u64,
+    /// Number of target intervals mapped to flat coordinates.
+    pub target_intervals: usize,
+    /// Number of maximal kept runs emitted (before BED12 packing).
+    pub kept_runs: usize,
+    /// Total kept base pairs across all runs.
+    pub kept_bp: u64,
+    /// Number of BED lines written.
+    pub lines_written: usize,
+}
+
+/// One contig's position in the flat concatenated genome.
+#[derive(Debug, Clone)]
+struct Contig {
+    name: String,
+    offset: u64,
+    len: u64,
+}
+
+/// Read a FASTA into the flat 2-bit genome (ACGT→0..3, N→[`N_SENTINEL`]) and the
+/// contig table. Contigs are concatenated in FASTA record order, one position
+/// per base, matching `prmi build`'s flat coordinate space (which encodes N→A
+/// but still occupies one position, so flat offsets are identical).
+fn read_fasta_contigs(path: &Path) -> Result<(Vec<u8>, Vec<Contig>)> {
+    let file = std::fs::File::open(path).map_err(|source| Error::Io {
+        path: path.to_path_buf(),
+        source,
+    })?;
+    let mut reader = Reader::new(BufReader::new(file));
+    let mut bases: Vec<u8> = Vec::new();
+    let mut contigs: Vec<Contig> = Vec::new();
+    let mut seen_names: HashSet<String> = HashSet::new();
+    for record in reader.records() {
+        let record = record.map_err(|e| Error::Fasta {
+            file: path.to_path_buf(),
+            detail: e.to_string(),
+        })?;
+        let name = String::from_utf8_lossy(record.name()).into_owned();
+        // `parse_target_bed_to_flat` collapses contigs into a name->offset map;
+        // a duplicate name would silently overwrite the earlier offset and map
+        // its targets to the wrong contig, so reject duplicates up front.
+        if !seen_names.insert(name.clone()) {
+            return Err(Error::InvalidInput {
+                detail: format!("reference FASTA contains duplicate contig name {name:?}"),
+            });
+        }
+        let offset = bases.len() as u64;
+        let mut len = 0u64;
+        for &b in record.sequence().as_ref() {
+            bases.push(base_to_2bit(b).unwrap_or(N_SENTINEL));
+            len += 1;
+        }
+        contigs.push(Contig { name, offset, len });
+    }
+    Ok((bases, contigs))
+}
+
+/// Parse a per-contig BED3 target file and map each interval into flat
+/// coordinates using the contig table. The chrom column must name a contig in
+/// the reference; `end` must not exceed the contig length.
+fn parse_target_bed_to_flat(path: &Path, contigs: &[Contig]) -> Result<Vec<(u64, u64)>> {
+    let by_name: HashMap<&str, &Contig> = contigs.iter().map(|c| (c.name.as_str(), c)).collect();
+    let text = std::fs::read_to_string(path).map_err(|source| Error::Io {
+        path: path.to_path_buf(),
+        source,
+    })?;
+    let mut out: Vec<(u64, u64)> = Vec::new();
+    for (lineno, line) in text.lines().enumerate() {
+        let line = line.trim();
+        if line.is_empty()
+            || line.starts_with('#')
+            || line.starts_with("track")
+            || line.starts_with("browser")
+        {
+            continue;
+        }
+        // Require exact BED3. A wider record (e.g. BED12 with exon blocks) would
+        // have its block structure silently dropped and be widened to the outer
+        // [start, end) span — unlike the keep-set OUTPUT, which `parse_bed`
+        // decodes block-precise. Reject it so the two paths cannot disagree.
+        let cols: Vec<&str> = line.split_whitespace().collect();
+        if cols.len() != 3 {
+            return Err(Error::InvalidInput {
+                detail: format!(
+                    "target BED line {}: expected BED3 with exactly 3 columns, got {}",
+                    lineno + 1,
+                    cols.len()
+                ),
+            });
+        }
+        let chrom = cols[0];
+        let parse = |s: &str, what: &str| -> Result<u64> {
+            s.parse::<u64>().map_err(|_| Error::InvalidInput {
+                detail: format!("target BED line {}: bad {} {:?}", lineno + 1, what, s),
+            })
+        };
+        let start = parse(cols[1], "start")?;
+        let end = parse(cols[2], "end")?;
+        if end <= start {
+            return Err(Error::InvalidInput {
+                detail: format!(
+                    "target BED line {}: end <= start ({} <= {})",
+                    lineno + 1,
+                    end,
+                    start
+                ),
+            });
+        }
+        let contig = by_name.get(chrom).ok_or_else(|| Error::InvalidInput {
+            detail: format!(
+                "target BED line {}: chrom {:?} not found in reference",
+                lineno + 1,
+                chrom
+            ),
+        })?;
+        if end > contig.len {
+            return Err(Error::InvalidInput {
+                detail: format!(
+                    "target BED line {}: end {} exceeds contig {:?} length {}",
+                    lineno + 1,
+                    end,
+                    chrom,
+                    contig.len
+                ),
+            });
+        }
+        out.push((contig.offset + start, contig.offset + end));
+    }
+    Ok(out)
+}
+
+/// Invoke `f(start, code)` for every full k-mer window in `bases`, where
+/// `start` is the window's first position and `code` is its 2-bit packed code.
+/// The rolling code resets at any [`N_SENTINEL`] base, so no window spans an N.
+/// `k` must be in 1..=32 (caller-validated).
+fn for_each_kmer(bases: &[u8], k: usize, mut f: impl FnMut(usize, u64)) {
+    let mask: u64 = if k == 32 {
+        u64::MAX
+    } else {
+        (1u64 << (2 * k)) - 1
+    };
+    let mut code: u64 = 0;
+    let mut valid: usize = 0;
+    for (i, &b) in bases.iter().enumerate() {
+        if b >= N_SENTINEL {
+            valid = 0;
+            code = 0;
+            continue;
+        }
+        code = ((code << 2) | b as u64) & mask;
+        valid += 1;
+        if valid >= k {
+            f(i + 1 - k, code);
+        }
+    }
+}
+
+/// Reverse-complement a `for_each_kmer` code: a k-mer packed LOW-aligned into the
+/// low `2*k` bits (base 0 in the high `2*(k-1)..2*k` slot). Complements each base
+/// (2-bit XOR `0b11`) and reverses base order, returning the RC in the same
+/// low-aligned encoding.
+///
+/// NOTE: this is *not* [`crate::encoding::reverse_complement_key`], which operates
+/// on HIGH-aligned, T-padded 32-mer keys; feeding a low-aligned `for_each_kmer`
+/// code to that function mis-aligns the bases for `k < 32` and silently drops RC
+/// homologs. Keep the two encodings from crossing.
+fn reverse_complement_low(mut code: u64, k: usize) -> u64 {
+    let mut rc = 0u64;
+    for _ in 0..k {
+        rc = (rc << 2) | ((!code) & 0b11);
+        code >>= 2;
+    }
+    rc
+}
+
+/// The strand-canonical key for a low-aligned k-mer code: the lexicographic min
+/// of the code and its reverse complement. A k-mer and its RC share one key, so
+/// forward/RC homologs compare equal and inverted repeats count toward `max_occ`.
+fn canonical_low_key(code: u64, k: usize) -> u64 {
+    code.min(reverse_complement_low(code, k))
+}
+
+/// Compute the merged, flat-coordinate kept runs for the keep-set.
+///
+/// `bases` is the flat 2-bit genome (values 0..=3, [`N_SENTINEL`] for N).
+/// `targets` are flat half-open target intervals (need not be sorted/merged).
+/// Returns sorted, merged `[start, end)` runs.
+///
+/// Returns [`Error::InvalidInput`] if `k` is outside `1..=32`, or if any target
+/// interval is empty/inverted (`end <= start`) or extends past `bases.len()`.
+///
+/// Pure (no I/O) so the homology logic can be unit-tested on synthetic input.
+pub fn compute_keep_runs(
+    bases: &[u8],
+    targets: &[(u64, u64)],
+    k: usize,
+    max_occ: u64,
+    flank: u64,
+) -> Result<Vec<(u64, u64)>> {
+    if !(1..=32).contains(&k) {
+        return Err(Error::InvalidInput {
+            detail: format!("keep-set k must be in 1..=32, got {k}"),
+        });
+    }
+    let n = bases.len();
+
+    // Target-position bitmap (whole intervals are kept). Validate caller input
+    // rather than silently clipping: an out-of-range or inverted interval is
+    // malformed (and `s.min(n) > e.min(n)` would panic the slice below), so
+    // reject it up front.
+    let mut in_target = vec![false; n];
+    for &(s, e) in targets {
+        if e <= s {
+            return Err(Error::InvalidInput {
+                detail: format!("keep-set target end <= start ({e} <= {s})"),
+            });
+        }
+        if e > n as u64 {
+            return Err(Error::InvalidInput {
+                detail: format!("keep-set target end {e} exceeds genome length {n}"),
+            });
+        }
+        for slot in &mut in_target[s as usize..e as usize] {
+            *slot = true;
+        }
+    }
+
+    // Pass 1: collect the strand-canonical keys of k-mers that START at a target
+    // position. Canonicalizing here (min of forward and reverse-complement code)
+    // means a target k-mer and its RC share one key, so inverted homologs match
+    // and count together later. We deliberately do NOT count all genome k-mers —
+    // on a whole genome that map would hold billions of entries (~140 GB on
+    // hg38). Only target k-mers can contribute to the keep-set, so only they are
+    // tracked.
+    let mut target_keys: HashSet<u64> = HashSet::new();
+    for_each_kmer(bases, k, |start, code| {
+        if in_target[start] {
+            target_keys.insert(canonical_low_key(code, k));
+        }
+    });
+
+    // Homology k-mer set: target k-mers under the count cap. When uncapped
+    // (max_occ == 0) every target k-mer qualifies and no counting is needed;
+    // otherwise count genome-wide occurrences of TARGET keys ONLY (the map is
+    // bounded by the number of distinct target k-mers, not the whole genome).
+    // Counting on the canonical key sums forward and reverse-complement
+    // occurrences, so inverted high-copy repeats cannot slip under the cap.
+    let halo: HashSet<u64> = if max_occ == 0 {
+        target_keys
+    } else {
+        let mut count: HashMap<u64, u64> = HashMap::with_capacity(target_keys.len());
+        for_each_kmer(bases, k, |_start, code| {
+            let key = canonical_low_key(code, k);
+            if target_keys.contains(&key) {
+                let c = count.entry(key).or_insert(0);
+                *c = c.saturating_add(1);
+            }
+        });
+        target_keys
+            .into_iter()
+            .filter(|key| *count.get(key).unwrap_or(&0) <= max_occ)
+            .collect()
+    };
+
+    // Keep bitmap: targets (whole) ∪ homology START positions (length-1). A
+    // position is a homologous anchor if its k-mer's canonical key matches a
+    // target k-mer's — which holds for both forward and reverse-complement
+    // copies (catches inverted paralogs); only the suffix-start position is kept.
+    let mut keep = in_target; // reuse the target bitmap as the base
+    for_each_kmer(bases, k, |start, code| {
+        if halo.contains(&canonical_low_key(code, k)) {
+            keep[start] = true;
+        }
+    });
+
+    Ok(runs_from_bitmap(&keep, flank))
+}
+
+/// Extract maximal `true` runs from `keep`, extend each by `flank` on both sides
+/// (clamped to `[0, len]`), then sort and merge overlapping runs.
+fn runs_from_bitmap(keep: &[bool], flank: u64) -> Vec<(u64, u64)> {
+    let n = keep.len();
+    let mut runs: Vec<(u64, u64)> = Vec::new();
+    let mut i = 0usize;
+    while i < n {
+        if !keep[i] {
+            i += 1;
+            continue;
+        }
+        let s = i;
+        while i < n && keep[i] {
+            i += 1;
+        }
+        let (mut a, mut b) = (s as u64, i as u64);
+        if flank > 0 {
+            a = a.saturating_sub(flank);
+            b = b.saturating_add(flank).min(n as u64);
+        }
+        runs.push((a, b));
+    }
+    if flank == 0 || runs.len() < 2 {
+        return runs;
+    }
+    runs.sort_unstable();
+    let mut merged: Vec<(u64, u64)> = Vec::with_capacity(runs.len());
+    for (s, e) in runs {
+        match merged.last_mut() {
+            Some(last) if s <= last.1 => {
+                if e > last.1 {
+                    last.1 = e;
+                }
+            }
+            _ => merged.push((s, e)),
+        }
+    }
+    merged
+}
+
+/// Write the kept runs as one BED3 line per run.
+fn emit_bed3<W: Write>(out: &mut W, chrom: &str, runs: &[(u64, u64)]) -> Result<usize> {
+    for &(s, e) in runs {
+        writeln!(out, "{chrom}\t{s}\t{e}").map_err(io_err)?;
+    }
+    Ok(runs.len())
+}
+
+/// Write the kept runs as BED12 lines, packing up to `blocks_per_line` runs as
+/// blocks each. Each line's span is `[first_run_start, last_run_end)`; block `i`
+/// is `(blockSizes[i], blockStarts[i] = run.start - span_start)`.
+fn emit_bed12<W: Write>(
+    out: &mut W,
+    chrom: &str,
+    runs: &[(u64, u64)],
+    blocks_per_line: usize,
+) -> Result<usize> {
+    let mut lines = 0usize;
+    for chunk in runs.chunks(blocks_per_line.max(1)) {
+        let span_start = chunk[0].0;
+        let span_end = chunk[chunk.len() - 1].1;
+        let sizes: Vec<String> = chunk.iter().map(|&(s, e)| (e - s).to_string()).collect();
+        let starts: Vec<String> = chunk
+            .iter()
+            .map(|&(s, _)| (s - span_start).to_string())
+            .collect();
+        writeln!(
+            out,
+            "{chrom}\t{span_start}\t{span_end}\tkeep\t0\t+\t{span_start}\t{span_end}\t0\t{}\t{}\t{}",
+            chunk.len(),
+            sizes.join(","),
+            starts.join(","),
+        )
+        .map_err(io_err)?;
+        lines += 1;
+    }
+    Ok(lines)
+}
+
+fn io_err(source: std::io::Error) -> Error {
+    Error::Io {
+        path: std::path::PathBuf::from("<keepset output>"),
+        source,
+    }
+}
+
+/// Build a keep-set BED from a reference FASTA and a per-contig target BED,
+/// writing it to `out`. Returns summary statistics.
+pub fn build_keepset<W: Write>(
+    ref_fa: &Path,
+    target_bed: &Path,
+    opts: &KeepsetOptions,
+    out: &mut W,
+) -> Result<KeepsetStats> {
+    let (bases, contigs) = read_fasta_contigs(ref_fa)?;
+    let genome_len = bases.len() as u64;
+    let targets = parse_target_bed_to_flat(target_bed, &contigs)?;
+    let runs = compute_keep_runs(&bases, &targets, opts.k, opts.max_occ, opts.flank)?;
+    let kept_bp: u64 = runs.iter().map(|&(s, e)| e - s).sum();
+    let lines_written = match opts.format {
+        KeepsetFormat::Bed3 => emit_bed3(out, &opts.chrom_label, &runs)?,
+        KeepsetFormat::Bed12 => emit_bed12(out, &opts.chrom_label, &runs, opts.blocks_per_line)?,
+    };
+    Ok(KeepsetStats {
+        genome_len,
+        target_intervals: targets.len(),
+        kept_runs: runs.len(),
+        kept_bp,
+        lines_written,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::train::mask::{covered_by_bed, parse_bed};
+    use tempfile::NamedTempFile;
+
+    // Build a base vector from an ACGTN string (N -> sentinel).
+    fn bases(s: &str) -> Vec<u8> {
+        s.bytes()
+            .map(|b| match b {
+                b'A' => 0,
+                b'C' => 1,
+                b'G' => 2,
+                b'T' => 3,
+                _ => N_SENTINEL,
+            })
+            .collect()
+    }
+
+    fn covered(runs: &[(u64, u64)], p: u64) -> bool {
+        runs.iter().any(|&(s, e)| p >= s && p < e)
+    }
+
+    #[test]
+    fn targets_are_kept_whole() {
+        // Target [2,5) kept entirely with no homology halo. The target k-mers
+        // (AACG/ACGG/CGGT at starts 2/3/4) and their reverse complements do not
+        // recur elsewhere, so the immediate neighbors at 1 (TAAC) and 5 (GGTC)
+        // stay out. (A periodic sequence like ACGTACGT would make position 1 an
+        // RC homolog of a target k-mer and thus legitimately kept.)
+        let b = bases("TTAACGGTCC");
+        let runs = compute_keep_runs(&b, &[(2, 5)], 4, 0, 0).unwrap();
+        assert!(covered(&runs, 2) && covered(&runs, 4));
+        assert!(!covered(&runs, 1) && !covered(&runs, 5));
+    }
+
+    #[test]
+    fn off_target_homolog_start_is_kept() {
+        // Two exact copies of "ACGTAC" at 0 and 12, separated by a unique GGGGGG
+        // filler. Target is the single start position 0, so the only target
+        // k-mer is "ACGTAC"; k=6 -> the copy's START (12) is a forward homolog.
+        let b = bases("ACGTACGGGGGGACGTACGGGG");
+        let runs = compute_keep_runs(&b, &[(0, 1)], 6, 0, 0).unwrap();
+        assert!(covered(&runs, 0), "target start kept");
+        assert!(covered(&runs, 12), "homolog start at 12 must be kept");
+        // Mid-kmer position 13 ("CGTACG") is not a target k-mer start, so the
+        // start-only keep leaves it out (we do not keep the whole [p,p+k) span).
+        assert!(!covered(&runs, 13), "only the suffix-start is kept");
+        // The GGGGGG filler is neither target nor homolog.
+        assert!(!covered(&runs, 7), "unique filler not kept");
+    }
+
+    #[test]
+    fn reverse_complement_homolog_is_kept() {
+        // Target k-mer "AACCGG" (at 0); its reverse complement "CCGGTT" planted
+        // at 7, isolated from the target by an N so the rolling window cannot
+        // also surface "CCGGTT" as a *forward* target k-mer. The only way to keep
+        // position 7 is therefore the reverse-complement path: it exercises RC
+        // homology in isolation (regression for the low- vs high-aligned key
+        // mismatch that previously dropped RC homologs for k < 32).
+        // Layout: 0..6 = target "AACCGG", 6 = N, 7..13 = RC copy "CCGGTT".
+        let b = bases("AACCGGNCCGGTT");
+        let runs = compute_keep_runs(&b, &[(0, 1)], 6, 0, 0).unwrap();
+        assert!(covered(&runs, 0), "target start kept");
+        assert!(covered(&runs, 7), "reverse-complement homolog start kept");
+    }
+
+    #[test]
+    fn high_copy_kmer_dropped_by_cap() {
+        // "AC" repeated -> the 2-mer "AC" occurs many times. With cap=2 it is
+        // dropped, so only the target interval itself is kept (no homology halo).
+        let b = bases("ACACACACACACACAC");
+        let capped = compute_keep_runs(&b, &[(0, 2)], 2, 2, 0).unwrap();
+        let kept_bp: u64 = capped.iter().map(|&(s, e)| e - s).sum();
+        assert_eq!(kept_bp, 2, "over-cap homology dropped; only target kept");
+        // Uncapped: the homologous starts are kept too (more than 2 bp).
+        let uncapped = compute_keep_runs(&b, &[(0, 2)], 2, 0, 0).unwrap();
+        let kept_bp_u: u64 = uncapped.iter().map(|&(s, e)| e - s).sum();
+        assert!(kept_bp_u > 2, "uncapped keeps homologous AC starts");
+    }
+
+    #[test]
+    fn inverted_repeat_counts_toward_cap() {
+        // The target k-mer "AC" (at 0) occurs once forward, but its reverse
+        // complement "GT" recurs four times in the "GTGTGTGT" tail. Because the
+        // cap is applied on the strand-canonical key, the AC/GT family count is
+        // 1 + 4 = 5, so an inverted high-copy repeat cannot slip under the cap.
+        let b = bases("ACGTGTGTGT");
+        // cap = 3 < 5 -> the whole family is dropped; only the target is kept.
+        let capped = compute_keep_runs(&b, &[(0, 1)], 2, 3, 0).unwrap();
+        let kept_bp: u64 = capped.iter().map(|&(s, e)| e - s).sum();
+        assert_eq!(kept_bp, 1, "inverted high-copy repeat dropped by the cap");
+        // Uncapped, the four reverse-complement homolog starts are kept too.
+        let uncapped = compute_keep_runs(&b, &[(0, 1)], 2, 0, 0).unwrap();
+        let kept_bp_u: u64 = uncapped.iter().map(|&(s, e)| e - s).sum();
+        assert_eq!(
+            kept_bp_u, 5,
+            "uncapped keeps the RC homolog starts (0,2,4,6,8)"
+        );
+    }
+
+    #[test]
+    fn flank_extends_and_merges_runs() {
+        // Exercise the run-extraction + flank + merge logic directly on a
+        // bitmap (isolated from homology). Single kept position 5.
+        let mut keep = vec![false; 12];
+        keep[5] = true;
+        assert_eq!(runs_from_bitmap(&keep, 0), vec![(5, 6)]);
+        assert_eq!(
+            runs_from_bitmap(&keep, 1),
+            vec![(4, 7)],
+            "flank=1 widens to [4,7)"
+        );
+        // Two nearby positions whose flanks overlap must merge into one run.
+        let mut keep2 = vec![false; 12];
+        keep2[3] = true;
+        keep2[5] = true;
+        assert_eq!(
+            runs_from_bitmap(&keep2, 1),
+            vec![(2, 7)],
+            "overlapping flanks merge"
+        );
+        // Flank clamps at the genome bounds.
+        let mut keep3 = vec![false; 5];
+        keep3[0] = true;
+        keep3[4] = true;
+        assert_eq!(
+            runs_from_bitmap(&keep3, 10),
+            vec![(0, 5)],
+            "flank clamps to [0,len]"
+        );
+    }
+
+    #[test]
+    fn invalid_k_rejected() {
+        let b = bases("ACGT");
+        assert!(compute_keep_runs(&b, &[], 0, 0, 0).is_err());
+        assert!(compute_keep_runs(&b, &[], 33, 0, 0).is_err());
+    }
+
+    #[test]
+    fn bed3_and_bed12_decode_to_identical_keepset() {
+        // Emit the same runs as BED3 and BED12, parse both back through the
+        // builder's parse_bed, and assert identical covered positions.
+        let runs = vec![(10u64, 12u64), (20, 21), (100, 130)];
+        let mut f3 = NamedTempFile::new().unwrap();
+        emit_bed3(&mut f3, "genome", &runs).unwrap();
+        f3.flush().unwrap();
+        let mut f12 = NamedTempFile::new().unwrap();
+        emit_bed12(&mut f12, "genome", &runs, 2).unwrap();
+        f12.flush().unwrap();
+        let iv3 = parse_bed(f3.path()).unwrap();
+        let iv12 = parse_bed(f12.path()).unwrap();
+        for p in 0..140u64 {
+            assert_eq!(
+                covered_by_bed(&iv3, p),
+                covered_by_bed(&iv12, p),
+                "BED3 and BED12 disagree at flat position {p}"
+            );
+        }
+        // Spot-check the keep-set content itself.
+        assert!(covered_by_bed(&iv3, 10) && covered_by_bed(&iv3, 11));
+        assert!(!covered_by_bed(&iv3, 12));
+        assert!(covered_by_bed(&iv3, 20) && !covered_by_bed(&iv3, 21));
+        assert!(covered_by_bed(&iv3, 129) && !covered_by_bed(&iv3, 130));
+    }
+
+    #[test]
+    fn target_bed_maps_per_contig_to_flat() {
+        // Two contigs of length 10 and 8; a target on the second contig maps to
+        // flat offset 10 + local.
+        let contigs = vec![
+            Contig {
+                name: "chr1".into(),
+                offset: 0,
+                len: 10,
+            },
+            Contig {
+                name: "chr2".into(),
+                offset: 10,
+                len: 8,
+            },
+        ];
+        let mut bed = NamedTempFile::new().unwrap();
+        writeln!(bed, "chr2\t2\t5").unwrap();
+        bed.flush().unwrap();
+        let flat = parse_target_bed_to_flat(bed.path(), &contigs).unwrap();
+        assert_eq!(flat, vec![(12, 15)]);
+    }
+
+    #[test]
+    fn duplicate_fasta_contig_names_rejected() {
+        // Duplicate names would collapse in the name->offset map and misroute
+        // targets, so reading the reference must fail with InvalidInput.
+        let mut fa = NamedTempFile::new().unwrap();
+        writeln!(fa, ">chr1\nACGT\n>chr1\nTTTT").unwrap();
+        fa.flush().unwrap();
+        let err = read_fasta_contigs(fa.path()).unwrap_err();
+        assert!(
+            matches!(err, Error::InvalidInput { .. }),
+            "expected InvalidInput for duplicate contig name, got {err:?}"
+        );
+        // A unique-named reference still reads cleanly.
+        let mut ok = NamedTempFile::new().unwrap();
+        writeln!(ok, ">chr1\nACGT\n>chr2\nTTTT").unwrap();
+        ok.flush().unwrap();
+        let (bases, contigs) = read_fasta_contigs(ok.path()).unwrap();
+        assert_eq!(bases.len(), 8);
+        assert_eq!(contigs.len(), 2);
+    }
+
+    #[test]
+    fn canonical_low_key_unifies_strands() {
+        // A k-mer and its reverse complement share one canonical key.
+        let acgtac = 0b00_01_10_11_00_01u64; // A C G T A C, low-aligned, k=6
+        let rc = reverse_complement_low(acgtac, 6);
+        assert_eq!(canonical_low_key(acgtac, 6), canonical_low_key(rc, 6));
+        // Reverse complement is an involution on the low-aligned encoding.
+        assert_eq!(reverse_complement_low(rc, 6), acgtac);
+    }
+
+    #[test]
+    fn target_bed_rejects_unknown_chrom_and_overrun() {
+        let contigs = vec![Contig {
+            name: "chr1".into(),
+            offset: 0,
+            len: 10,
+        }];
+        let mut bad_chrom = NamedTempFile::new().unwrap();
+        writeln!(bad_chrom, "chrX\t0\t5").unwrap();
+        bad_chrom.flush().unwrap();
+        assert!(parse_target_bed_to_flat(bad_chrom.path(), &contigs).is_err());
+        let mut overrun = NamedTempFile::new().unwrap();
+        writeln!(overrun, "chr1\t0\t20").unwrap();
+        overrun.flush().unwrap();
+        assert!(parse_target_bed_to_flat(overrun.path(), &contigs).is_err());
+    }
+
+    #[test]
+    fn target_bed_requires_exact_bed3() {
+        // A wider record (BED6, BED12) must be rejected rather than silently
+        // collapsed to its outer [start, end) span, since the keep-set output is
+        // decoded block-precise by parse_bed.
+        let contigs = vec![Contig {
+            name: "chr1".into(),
+            offset: 0,
+            len: 100,
+        }];
+        // BED6: 6 columns -> rejected.
+        let mut bed6 = NamedTempFile::new().unwrap();
+        writeln!(bed6, "chr1\t0\t50\tname\t0\t+").unwrap();
+        bed6.flush().unwrap();
+        assert!(matches!(
+            parse_target_bed_to_flat(bed6.path(), &contigs),
+            Err(Error::InvalidInput { .. })
+        ));
+        // BED12: an exon-block transcript -> rejected (not widened to [10,90)).
+        let mut bed12 = NamedTempFile::new().unwrap();
+        writeln!(bed12, "chr1\t10\t90\ttx\t0\t+\t10\t90\t0\t2\t5,5,\t0,75,").unwrap();
+        bed12.flush().unwrap();
+        assert!(matches!(
+            parse_target_bed_to_flat(bed12.path(), &contigs),
+            Err(Error::InvalidInput { .. })
+        ));
+        // Exact BED3 still parses.
+        let mut bed3 = NamedTempFile::new().unwrap();
+        writeln!(bed3, "chr1\t10\t20").unwrap();
+        bed3.flush().unwrap();
+        assert_eq!(
+            parse_target_bed_to_flat(bed3.path(), &contigs).unwrap(),
+            vec![(10, 20)]
+        );
+    }
+
+    #[test]
+    fn compute_keep_runs_rejects_invalid_targets() {
+        // compute_keep_runs is public and returns Result, so malformed intervals
+        // must be rejected, not silently clipped (an inverted range would also
+        // panic the bitmap slice). Genome below is length 10.
+        let b = bases("ACGTACGTAC");
+        // end > genome length.
+        assert!(matches!(
+            compute_keep_runs(&b, &[(0, 20)], 4, 0, 0),
+            Err(Error::InvalidInput { .. })
+        ));
+        // end <= start.
+        assert!(matches!(
+            compute_keep_runs(&b, &[(5, 5)], 4, 0, 0),
+            Err(Error::InvalidInput { .. })
+        ));
+        assert!(matches!(
+            compute_keep_runs(&b, &[(8, 3)], 4, 0, 0),
+            Err(Error::InvalidInput { .. })
+        ));
+        // A valid in-range interval still succeeds.
+        assert!(compute_keep_runs(&b, &[(2, 5)], 4, 0, 0).is_ok());
+    }
+}
diff --git a/prmi/src/lib.rs b/prmi/src/lib.rs
index 89b49ea..4212238 100644
--- a/prmi/src/lib.rs
+++ b/prmi/src/lib.rs
@@ -14,6 +14,7 @@ pub mod fasta;
 pub mod histogram;
 pub mod index;
 pub mod inspect;
+pub mod keepset;
 pub mod pac;
 pub mod sa;
 pub mod sidecar;
diff --git a/prmi/src/train/mask.rs b/prmi/src/train/mask.rs
index 2c35050..02fa8d0 100644
--- a/prmi/src/train/mask.rs
+++ b/prmi/src/train/mask.rs
@@ -53,7 +53,7 @@ pub struct BedInterval {
     pub end: u64,
 }
 
-/// Parse a BED file into a sorted list of `BedInterval`s.
+/// Parse a BED file into a sorted, merged list of `BedInterval`s.
 ///
 /// Comment lines (`#`), track/browser header lines, and blank lines are
 /// silently skipped. Requires at least 3 whitespace-separated columns
@@ -61,10 +61,22 @@ pub struct BedInterval {
 /// intervals are concatenated genome-wide, matching the flat-genome
 /// coordinate space used by the trainer.
 ///
-/// Returns an error if any data line has fewer than 3 columns, or if
-/// start/end are not valid integers, or if `end <= start`.
+/// Per-line BED3 vs BED12 (decided by column count):
+/// - **BED3** (3–11 columns): keep the whole `[start, end)` interval. This is
+///   the canonical target / coarse-homology shape and is unchanged.
+/// - **BED12** (≥12 columns): keep ONLY the blocks. Columns 10/11/12 are
+///   `blockCount` / comma-separated `blockSizes` / comma-separated
+///   `blockStarts` (offsets relative to `start`); block `i` contributes
+///   `[start + blockStarts[i], start + blockStarts[i] + blockSizes[i])`. This
+///   lets the keep-set encode position-precise homology (e.g. length-1 / short
+///   contiguous-run k-mer start positions) compactly, one BED line per region.
 ///
-/// The returned vector is sorted by `start`.
+/// Returns an error if any data line has fewer than 3 columns, if start/end or
+/// any block field is not a valid integer, if `end <= start`, or if a BED12
+/// line's blocks are malformed (count mismatch, zero-length, or extending past
+/// `end`).
+///
+/// The returned vector is sorted by `start` and overlapping intervals merged.
 pub fn parse_bed(path: &Path) -> Result<Vec<BedInterval>> {
     let text = std::fs::read_to_string(path).map_err(|source| Error::Io {
         path: path.to_path_buf(),
@@ -114,7 +126,13 @@ pub fn parse_bed(path: &Path) -> Result<Vec<BedInterval>> {
                 ),
             });
         }
-        out.push(BedInterval { start, end });
+        // A 12+-column line carries BED12 block fields (cols 10/11/12): keep
+        // only the blocks. Anything narrower is BED3: keep the whole interval.
+        if cols.len() >= 12 {
+            push_bed12_blocks(&mut out, &cols, start, end, lineno)?;
+        } else {
+            out.push(BedInterval { start, end });
+        }
     }
     out.sort_by_key(|i| i.start);
 
@@ -141,6 +159,105 @@ pub fn parse_bed(path: &Path) -> Result<Vec<BedInterval>> {
     Ok(out)
 }
 
+/// Parse the BED12 block fields of a single line and append one `BedInterval`
+/// per block to `out`.
+///
+/// `cols` is the whitespace-split line (already known to have ≥12 columns);
+/// `start`/`end` are the line's chromStart/chromEnd. Per the BED12 spec, column
+/// 10 (`cols[9]`) is `blockCount`, column 11 (`cols[10]`) is comma-separated
+/// `blockSizes`, and column 12 (`cols[11]`) is comma-separated `blockStarts`
+/// (offsets relative to `start`). A trailing comma (UCSC-style) is tolerated.
+/// Block `i` contributes `[start + starts[i], start + starts[i] + sizes[i])`.
+///
+/// Errors (all `InvalidInput`, with the 1-based line number) on a non-integer
+/// field, `blockCount == 0`, a size/start-count mismatch, a zero-length block,
+/// or a block extending past `end`.
+fn push_bed12_blocks(
+    out: &mut Vec<BedInterval>,
+    cols: &[&str],
+    start: u64,
+    end: u64,
+    lineno: usize,
+) -> Result<()> {
+    let err = |detail: String| Error::InvalidInput { detail };
+    let block_count: usize = cols[9].parse().map_err(|_| {
+        err(format!(
+            "BED parse error at line {}: bad blockCount {:?}",
+            lineno + 1,
+            cols[9]
+        ))
+    })?;
+    if block_count == 0 {
+        return Err(err(format!(
+            "BED parse error at line {}: blockCount is 0 (a BED12 line must have >=1 block)",
+            lineno + 1
+        )));
+    }
+    // Split a comma-separated u64 list, tolerating a single trailing comma.
+    let split_u64 = |field: &str, what: &str| -> Result<Vec<u64>> {
+        field
+            .split(',')
+            .filter(|s| !s.is_empty())
+            .map(|s| {
+                s.parse::<u64>().map_err(|_| {
+                    err(format!(
+                        "BED parse error at line {}: bad {} entry {:?}",
+                        lineno + 1,
+                        what,
+                        s
+                    ))
+                })
+            })
+            .collect()
+    };
+    let sizes = split_u64(cols[10], "blockSizes")?;
+    let starts = split_u64(cols[11], "blockStarts")?;
+    if sizes.len() != block_count || starts.len() != block_count {
+        return Err(err(format!(
+            "BED parse error at line {}: blockCount {} but got {} blockSizes and {} blockStarts",
+            lineno + 1,
+            block_count,
+            sizes.len(),
+            starts.len()
+        )));
+    }
+    for i in 0..block_count {
+        if sizes[i] == 0 {
+            return Err(err(format!(
+                "BED parse error at line {}: block {} has zero length",
+                lineno + 1,
+                i
+            )));
+        }
+        // Checked arithmetic: a malformed line with huge blockStart/blockSize
+        // could otherwise wrap and slip past the `be > end` bound check below.
+        let bs = start.checked_add(starts[i]);
+        let be = bs.and_then(|bs| bs.checked_add(sizes[i]));
+        let (bs, be) = match (bs, be) {
+            (Some(bs), Some(be)) => (bs, be),
+            _ => {
+                return Err(err(format!(
+                    "BED parse error at line {}: block {} coordinates overflow",
+                    lineno + 1,
+                    i
+                )))
+            }
+        };
+        if be > end {
+            return Err(err(format!(
+                "BED parse error at line {}: block {} [{}, {}) extends past chromEnd {}",
+                lineno + 1,
+                i,
+                bs,
+                be,
+                end
+            )));
+        }
+        out.push(BedInterval { start: bs, end: be });
+    }
+    Ok(())
+}
+
 /// Decide whether a doubled-text suffix-array position should be RETAINED by a
 /// tiered keep-mask (Design Z).
 ///
@@ -387,6 +504,123 @@ mod tests {
         );
     }
 
+    // --- BED12 block parsing -------------------------------------------------
+
+    #[test]
+    fn parse_bed12_keeps_only_blocks_not_whole_span() {
+        // Line spans [100, 200) but declares 2 blocks: [100,110) and [150,170).
+        // BED12 keeps ONLY the blocks; the [110,150) and [170,200) gaps are out.
+        let mut f = NamedTempFile::new().unwrap();
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t2\t10,20\t0,50").unwrap();
+        let intervals = parse_bed(f.path()).unwrap();
+        assert_eq!(
+            intervals.len(),
+            2,
+            "two disjoint blocks, not the whole span"
+        );
+        assert_eq!((intervals[0].start, intervals[0].end), (100, 110));
+        assert_eq!((intervals[1].start, intervals[1].end), (150, 170));
+        assert!(covered_by_bed(&intervals, 100));
+        assert!(covered_by_bed(&intervals, 109));
+        assert!(!covered_by_bed(&intervals, 110), "block1 end is exclusive");
+        assert!(!covered_by_bed(&intervals, 120), "inter-block gap not kept");
+        assert!(covered_by_bed(&intervals, 150));
+        assert!(covered_by_bed(&intervals, 169));
+        assert!(!covered_by_bed(&intervals, 170), "block2 end is exclusive");
+        assert!(
+            !covered_by_bed(&intervals, 199),
+            "span tail past last block not kept"
+        );
+    }
+
+    #[test]
+    fn parse_bed12_tolerates_trailing_comma() {
+        // UCSC writes blockSizes/blockStarts with a trailing comma.
+        let mut f = NamedTempFile::new().unwrap();
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t2\t10,20,\t0,50,").unwrap();
+        let intervals = parse_bed(f.path()).unwrap();
+        assert_eq!(intervals.len(), 2);
+        assert_eq!((intervals[0].start, intervals[0].end), (100, 110));
+        assert_eq!((intervals[1].start, intervals[1].end), (150, 170));
+    }
+
+    #[test]
+    fn parse_bed12_length1_blocks_are_position_precise() {
+        // The k-mer-homology generator's shape: single-position blocks marking
+        // exact suffix-start positions. Two length-1 blocks at offsets 0 and 5.
+        let mut f = NamedTempFile::new().unwrap();
+        writeln!(f, "chr1\t1000\t1010\tr\t0\t+\t1000\t1010\t0\t2\t1,1\t0,5").unwrap();
+        let intervals = parse_bed(f.path()).unwrap();
+        assert_eq!(intervals.len(), 2);
+        assert_eq!((intervals[0].start, intervals[0].end), (1000, 1001));
+        assert_eq!((intervals[1].start, intervals[1].end), (1005, 1006));
+        assert!(covered_by_bed(&intervals, 1000));
+        assert!(!covered_by_bed(&intervals, 1001));
+        assert!(covered_by_bed(&intervals, 1005));
+        assert!(!covered_by_bed(&intervals, 1004));
+    }
+
+    #[test]
+    fn parse_bed_mixed_bed3_and_bed12_lines() {
+        // BED3 line kept whole; BED12 line kept by blocks; both coexist + merge.
+        let mut f = NamedTempFile::new().unwrap();
+        writeln!(f, "chr1\t0\t50").unwrap();
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t1\t10\t0").unwrap();
+        let intervals = parse_bed(f.path()).unwrap();
+        assert_eq!(intervals.len(), 2);
+        assert_eq!((intervals[0].start, intervals[0].end), (0, 50));
+        assert_eq!((intervals[1].start, intervals[1].end), (100, 110));
+    }
+
+    #[test]
+    fn parse_bed12_rejects_block_count_mismatch() {
+        let mut f = NamedTempFile::new().unwrap();
+        // blockCount=2 but only one size/start each.
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t2\t10\t0").unwrap();
+        let err = parse_bed(f.path()).unwrap_err();
+        assert!(format!("{err}").contains("blockCount"));
+    }
+
+    #[test]
+    fn parse_bed12_rejects_block_past_chrom_end() {
+        let mut f = NamedTempFile::new().unwrap();
+        // Block [100, 260) extends past chromEnd 200.
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t1\t160\t0").unwrap();
+        let err = parse_bed(f.path()).unwrap_err();
+        assert!(format!("{err}").contains("extends past chromEnd"));
+    }
+
+    #[test]
+    fn parse_bed12_rejects_zero_block_count() {
+        let mut f = NamedTempFile::new().unwrap();
+        // 12 columns so the BED12 branch is taken, but blockCount is 0.
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t0\t1\t0").unwrap();
+        let err = parse_bed(f.path()).unwrap_err();
+        assert!(format!("{err}").contains("blockCount is 0"));
+    }
+
+    #[test]
+    fn parse_bed12_rejects_overflowing_block_coords() {
+        let mut f = NamedTempFile::new().unwrap();
+        // blockStart near u64::MAX would wrap start+blockStart; must be rejected
+        // rather than slipping past the chromEnd bound via wraparound.
+        writeln!(
+            f,
+            "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t1\t10\t18446744073709551610"
+        )
+        .unwrap();
+        let err = parse_bed(f.path()).unwrap_err();
+        assert!(format!("{err}").contains("overflow"));
+    }
+
+    #[test]
+    fn parse_bed12_rejects_zero_length_block() {
+        let mut f = NamedTempFile::new().unwrap();
+        writeln!(f, "chr1\t100\t200\tr\t0\t+\t100\t200\t0\t1\t0\t0").unwrap();
+        let err = parse_bed(f.path()).unwrap_err();
+        assert!(format!("{err}").contains("zero length"));
+    }
+
     // --- covered_by_bed ------------------------------------------------------
 
     #[test]