diff --git a/benchmarks/conference_study/analyses/ci_auc.py b/benchmarks/conference_study/analyses/ci_auc.py new file mode 100644 index 0000000..2fd67a8 --- /dev/null +++ b/benchmarks/conference_study/analyses/ci_auc.py @@ -0,0 +1,243 @@ +"""Cluster-bootstrap-over-papers 95% CIs for the pairwise-accuracy (Mann-Whitney +AUC) cells in the conference study. + +Each quality proxy splits the papers into two sides: a high-quality group and a +low-quality group. The AUC is the pairwise accuracy of that split, the +probability that the system puts more comments on a randomly drawn low-quality +paper than on a randomly drawn high-quality one (0.5 means no separation). + +Resampling unit = paper, stratified by (proxy, side): within each proxy we +resample the high-side papers and the low-side papers with replacement and take +2.5/97.5 percentiles. No significance tests. + +Paper sets, result dirs, and the tables to print are defined in a JSON config +(--config, defaults to ci_auc_tables.json). The config and the result JSONs and +manifests it points at are kept locally (gitignored, large); the math is covered +by tests/test_ci_auc.py on in-memory data. Three table kinds, one per paper +format: "comment_volume" (Table 1, mean comments + delta + overall AUC), +"by_proxy" (Table 2, accuracy per quality proxy), and "by_severity" (Table 9, +accuracy per severity tier). +""" +import sys +from pathlib import Path +from collections import defaultdict +import numpy as np + +HERE = Path(__file__).resolve().parent +from compute_auc import load_manifest, load_counts, cell_summary, auc_from # noqa: E402 + +B = 5000 +RNG = np.random.default_rng(42) + + +def by_proxy_totals(counts_for_cell, slug_to_mem): + """proxy_id -> {'high': [counts], 'low': [counts]} (total comments).""" + by_proxy = defaultdict(lambda: {"high": [], "low": []}) + for slug, rec in counts_for_cell.items(): + for m in slug_to_mem.get(slug, []): + # "pair" is the manifest's field name for a quality proxy. + by_proxy[m["pair"]][m["side"]].append(rec["total"]) + return by_proxy + + +def cell_aucs(by_proxy): + """Return (overall_auc, {proxy: auc}).""" + per = {} + H = T = 0.0 + for proxy_id, d in by_proxy.items(): + a, hits, tot = auc_from(d["high"], d["low"]) + per[proxy_id] = a + H += hits; T += tot + return (H / T if T else np.nan), per + + +def bootstrap(by_proxy): + """Cluster-bootstrap CIs for one cell's overall and per-proxy AUC. + + Each of B draws resamples papers with replacement, stratified by (proxy, + side): within every proxy the high-side and low-side papers are resampled + on their own. Per draw, each proxy's resampled AUC is recorded, and hits and + pairs are pooled across proxies for that draw's overall AUC. Returns + ((overall_lo, overall_hi), {proxy_id: (lo, hi)}) from the 2.5/97.5 + percentiles of those draws. + """ + overalls = [] + per_lists = defaultdict(list) + proxy_ids = sorted(by_proxy) + arrs = {proxy_id: (np.asarray(by_proxy[proxy_id]["high"]), np.asarray(by_proxy[proxy_id]["low"])) for proxy_id in proxy_ids} + for _ in range(B): + H = T = 0.0 + for proxy_id in proxy_ids: + high, low = arrs[proxy_id] + if high.size == 0 or low.size == 0: + continue + high_resampled = high[RNG.integers(0, high.size, high.size)] + low_resampled = low[RNG.integers(0, low.size, low.size)] + a, hits, tot = auc_from(high_resampled, low_resampled) + per_lists[proxy_id].append(a) + H += hits; T += tot + overalls.append(H / T if T else np.nan) + # A proxy with papers on only one side never gets a draw appended, so guard + # the empty case (mirrors the nan the point-estimate paths return there). + ci = lambda xs: tuple(np.percentile(xs, [2.5, 97.5])) if xs else (float("nan"), float("nan")) + return ci(overalls), {proxy_id: ci(per_lists[proxy_id]) for proxy_id in proxy_ids} + + +PROXY = {1: "Community", 2: "Conference", 3: "Reviewer", 4: "Composite"} + + +# ---- per-severity-tier AUCs (appendix tier tables) ---- + +def by_proxy_recs(counts_for_cell, slug_to_mem): + """proxy_id -> {'high': [rec], 'low': [rec]}. + + A rec is one paper's comment counts from load_counts: + {'total', 'major', 'moderate', 'minor'}. Keeps the whole rec (not just the + total) so per-severity-tier AUCs can be computed.""" + by_proxy = defaultdict(lambda: {"high": [], "low": []}) + for slug, rec in counts_for_cell.items(): + for m in slug_to_mem.get(slug, []): + by_proxy[m["pair"]][m["side"]].append(rec) + return by_proxy + + +def bootstrap_tiers(by_proxy, tiers): + """Cluster-bootstrap CI per tier. Returns {tier: (lo, hi)}. + + 'tiers' may include 'total' (the overall comment count) alongside the + severity tiers. One paper resample per (proxy, side) is shared across all + tiers within a draw, so the tiers stay correlated. Supplies the CIs for the + volume table (overall) and the tier table (overall + per-severity). + """ + proxy_ids = sorted(by_proxy) + draws = {t: [] for t in tiers} + for _ in range(B): + Ht = {t: 0.0 for t in tiers}; Tt = {t: 0.0 for t in tiers} + for proxy_id in proxy_ids: + high = by_proxy[proxy_id]["high"]; low = by_proxy[proxy_id]["low"] + if not high or not low: + continue + high_idx = RNG.integers(0, len(high), len(high)) + low_idx = RNG.integers(0, len(low), len(low)) + for t in tiers: + high_resampled = [high[j][t] for j in high_idx] + low_resampled = [low[j][t] for j in low_idx] + _, hits, tot = auc_from(high_resampled, low_resampled) + Ht[t] += hits; Tt[t] += tot + for t in tiers: + draws[t].append(Ht[t] / Tt[t] if Tt[t] else np.nan) + return {t: tuple(np.percentile(draws[t], [2.5, 97.5])) for t in tiers} + + +def auc_cell(point, lo_hi): + """Format one AUC point estimate with its CI as 'point [lo, hi]'.""" + lo, hi = lo_hi + return f"{point:.2f} [{lo:.2f}, {hi:.2f}]" + + +def _labels_and_width(cells): + """Row labels ('method__model') and the first-column width to align them.""" + labels = [f"{method}__{model}" for method, model in cells] + return labels, max([len("method__model")] + [len(x) for x in labels]) + 2 + + +def run_comment_volume(manifest, dirs, cells): + """Table 1 (model-aggregate): mean comments on the high/low-quality groups, + their difference (delta, % increase), and overall AUC with CI.""" + slug_to_mem = load_manifest(manifest) + counts = load_counts(dirs, set(slug_to_mem)) + labels, w = _labels_and_width(cells) + print(f"{'method__model':<{w}}{'c_high':>8}{'c_low':>8}{'delta':>8}{'%inc':>8} {'Overall':<18}".rstrip()) + for (method, model), label in zip(cells, labels): + cc = counts.get((method, model)) + if not cc: + print(f"{label:<{w}}NO DATA"); continue + s = cell_summary(cc, slug_to_mem) + overall_ci = bootstrap_tiers(by_proxy_recs(cc, slug_to_mem), ["total"])["total"] + row = (f"{label:<{w}}{s['c_high']:>8.2f}{s['c_low']:>8.2f}{s['delta']:>+8.2f}" + f"{s['pct_increase']:>7.1f}% {auc_cell(s['auc_overall'], overall_ci)}") + print(row.rstrip()) + + +def run_by_proxy(manifest, dirs, cells): + """Table 2 (system-deltas): mean comments and per-quality-proxy AUC plus + overall, with CIs, one (method, model) per row.""" + slug_to_mem = load_manifest(manifest) + counts = load_counts(dirs, set(slug_to_mem)) + labels, w = _labels_and_width(cells) + proxy_ids = sorted(PROXY) # Community / Conference / Reviewer / Composite + head = (f"{'method__model':<{w}}{'c_bar':>8} " + + "".join(f"{PROXY[p]:<18}" for p in proxy_ids) + f"{'Overall':<18}") + print(head.rstrip()) + for (method, model), label in zip(cells, labels): + cc = counts.get((method, model)) + if not cc: + print(f"{label:<{w}}NO DATA"); continue + s = cell_summary(cc, slug_to_mem) + by_proxy = by_proxy_totals(cc, slug_to_mem) + overall, per = cell_aucs(by_proxy) + (olo, ohi), perci = bootstrap(by_proxy) + c_bar = (s["c_high"] + s["c_low"]) / 2 + row = f"{label:<{w}}{c_bar:>8.2f} " + row += "".join(f"{(auc_cell(per[p], perci[p]) if p in per else '-'):<18}" for p in proxy_ids) + row += f"{auc_cell(overall, (olo, ohi)):<18}" + print(row.rstrip()) + + +def run_by_severity(manifest, dirs, cells): + """Table 9 (severity-aggregate): mean comments on the low-quality group and + per-severity-tier AUC (Major / Moderate / Minor) plus overall, with CIs.""" + slug_to_mem = load_manifest(manifest) + counts = load_counts(dirs, set(slug_to_mem)) + labels, w = _labels_and_width(cells) + # (column header, cell_summary point key, bootstrap tier key) + TIER_COLS = [("Major", "auc_major", "major"), ("Moderate", "auc_moderate", "moderate"), + ("Minor", "auc_minor", "minor"), ("Overall", "auc_overall", "total")] + print((f"{'method__model':<{w}}{'c_low':>8} " + + "".join(f"{name:<18}" for name, _, _ in TIER_COLS)).rstrip()) + for (method, model), label in zip(cells, labels): + cc = counts.get((method, model)) + if not cc: + print(f"{label:<{w}}NO DATA"); continue + s = cell_summary(cc, slug_to_mem) + ci = bootstrap_tiers(by_proxy_recs(cc, slug_to_mem), [t for _, _, t in TIER_COLS]) + row = f"{label:<{w}}{s['c_low']:>8.2f} " + row += "".join(f"{auc_cell(s[pkey], ci[tkey]):<18}" for _, pkey, tkey in TIER_COLS) + print(row.rstrip()) + + +# Table kind -> the report function that prints it. cells come from the config. +# Each kind maps to one paper-table format: comment_volume = Table 1 +# (model-aggregate), by_proxy = Table 2 (system-deltas), by_severity = Table 9 +# (severity-aggregate). +DISPATCH = {"comment_volume": run_comment_volume, "by_proxy": run_by_proxy, "by_severity": run_by_severity} + + +def run_cohort(config, name, base): + """Run every table defined for one cohort. manifest and dirs in the config + are resolved relative to `base` (the config file's directory).""" + cohort = config["cohorts"][name] + manifest = base / cohort["manifest"] + dirs = [base / d for d in cohort["dirs"]] + for table in cohort["tables"]: + print(f"\n########## {table['title']} ##########") + cells = [tuple(c) for c in table["cells"]] + DISPATCH[table["kind"]](manifest, dirs, cells) + + +if __name__ == "__main__": + import argparse + import json + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--config", type=Path, default=HERE / "ci_auc_tables.json", + help="JSON defining cohorts, result dirs, and tables. " + "Paths inside it are relative to the config's directory. " + "Defaults to ci_auc_tables.json.") + ap.add_argument("--cohort", help="cohort name from the config " + "(default: the first one defined)") + args = ap.parse_args() + config = json.loads(args.config.read_text()) + cohort = args.cohort or next(iter(config["cohorts"])) + if cohort not in config["cohorts"]: + ap.error(f"unknown cohort {cohort!r}; choices: {list(config['cohorts'])}") + run_cohort(config, cohort, args.config.resolve().parent) diff --git a/benchmarks/conference_study/analyses/compute_auc.py b/benchmarks/conference_study/analyses/compute_auc.py index d32f1a9..3ff04a6 100644 --- a/benchmarks/conference_study/analyses/compute_auc.py +++ b/benchmarks/conference_study/analyses/compute_auc.py @@ -10,33 +10,9 @@ - Per-tier AUC (major, moderate, minor) using the same pair-hit definition on per-tier comment counts. - Mean comments per paper on the high and low groups, Δ, % increase. - - Paper coverage and pair-count denominator. \\coarse's native {minor, major, critical} severity tiers are normalized to -{minor, moderate, major} via {critical→major, major→moderate, minor→minor} -so that severity AUCs are comparable across systems. - -The script keys papers by the full ``slug`` field from the manifest (not by -forum-id prefix), since some forum-ids start with ``-`` and would collide -under naive splitting. - -Usage: - # Frontier subset (Tables 1/2 in the paper) - python compute_auc.py \\ - --manifest manifests/v2_frontier/combined.json \\ - --dir frontier_subset_progressive frontier_subset_zero_shot \\ - scaleup_v2_progressive scaleup_v2_zero_shot coarse_v2 \\ - scaleup_v2_grok_progressive scaleup_v2_grok_zero_shot coarse_v2_grok - - # Full 240-paper cohort (Tables 3/4 + appendix) - python compute_auc.py \\ - --manifest manifests/v2/combined.json \\ - --dir scaleup_v2_zero_shot scaleup_v2_progressive coarse_v2 \\ - scaleup_v2_grok_zero_shot scaleup_v2_grok_progressive coarse_v2_grok - - # Just one (method, model) - python compute_auc.py --manifest ... --dir ... \\ - --method progressive_original --model grok-4.1-fast +{minor, moderate, major} so that severity AUCs are comparable across systems. """ from __future__ import annotations @@ -47,6 +23,8 @@ from pathlib import Path from statistics import mean +import numpy as np + HERE = Path(__file__).resolve().parent REPO_ROOT = HERE.parent # benchmarks/conference_study/ RESULTS_ROOT = REPO_ROOT / "results" @@ -56,6 +34,9 @@ def normalize_severity(method: str, raw: str | None) -> str | None: + """Map a raw severity label to {major, moderate, minor}, or None if absent + or unrecognized. coarse's {critical, major, minor} is remapped so tiers are + comparable across systems.""" if not raw: return None raw = raw.lower() @@ -99,29 +80,27 @@ def load_counts( return counts -def pair_auc(highs: list[int], lows: list[int]) -> tuple[float, int, int]: - """Returns (auc, strict_wins, total_pairs). 0.5 credit for ties.""" - hits = 0.0 - wins = 0 - total = 0 - for cl in lows: - for ch in highs: - if cl > ch: - hits += 1 - wins += 1 - elif cl == ch: - hits += 0.5 - total += 1 - return (hits / total if total else float("nan"), wins, total) +def auc_from(highs, lows) -> tuple[float, float, int]: + """Pairwise-accuracy AUC over the high x low outer product. + """ + high = np.asarray(highs); low = np.asarray(lows) + if high.size == 0 or low.size == 0: + return float("nan"), 0.0, 0 + diff = low[:, None] - high[None, :] + hits = (diff > 0).sum() + 0.5 * (diff == 0).sum() + total = high.size * low.size + return hits / total, float(hits), total def cell_summary( counts_for_cell: dict[str, dict], slug_to_memberships: dict[str, list[dict]], ) -> dict: - """Compute per-cell summary stats. counts_for_cell is {slug: rec}.""" - # Group counts by (pair, side) — for the Δ and AUC computations - by_pair: dict[int, dict[str, dict[str, list[int]]]] = defaultdict( + """Per-cell summary: comment means, Δ, % increase, and overall + per-tier + AUC, pooling hits and pairs across proxies. counts_for_cell is {slug: rec}.""" + # Group counts by (proxy, side). "pair" is the manifest's field name for a + # quality proxy. + by_proxy: dict[int, dict[str, dict[str, list[int]]]] = defaultdict( lambda: {"high": {"total": [], "major": [], "moderate": [], "minor": []}, "low": {"total": [], "major": [], "moderate": [], "minor": []}} ) @@ -129,7 +108,7 @@ def cell_summary( for m in slug_to_memberships.get(slug, []): side = m["side"] for k in ("total", "major", "moderate", "minor"): - by_pair[m["pair"]][side][k].append(rec[k]) + by_proxy[m["pair"]][side][k].append(rec[k]) # Δ and means across the four (or fewer) proxies highs_means: list[float] = [] @@ -137,68 +116,48 @@ def cell_summary( deltas: list[float] = [] auc_overall_hits = 0.0 auc_overall_total = 0 - auc_overall_wins = 0 - auc_tier: dict[str, list[float]] = {t: [] for t in SEVERITY_TIERS} auc_tier_hits: dict[str, float] = {t: 0.0 for t in SEVERITY_TIERS} auc_tier_total: dict[str, int] = {t: 0 for t in SEVERITY_TIERS} - auc_tier_wins: dict[str, int] = {t: 0 for t in SEVERITY_TIERS} - for pid in sorted(by_pair): - h_total = by_pair[pid]["high"]["total"] - l_total = by_pair[pid]["low"]["total"] - if not h_total or not l_total: + for proxy_id in sorted(by_proxy): + high_total = by_proxy[proxy_id]["high"]["total"] + low_total = by_proxy[proxy_id]["low"]["total"] + if not high_total or not low_total: continue - h_mean = mean(h_total) - l_mean = mean(l_total) - highs_means.append(h_mean) - lows_means.append(l_mean) - deltas.append(l_mean - h_mean) - # Overall pair AUC - _, w, t = pair_auc(h_total, l_total) - auc_overall_wins += w - auc_overall_total += t - # accumulate by counting hits directly to avoid float-mean drift - for cl in l_total: - for ch in h_total: - if cl > ch: - auc_overall_hits += 1 - elif cl == ch: - auc_overall_hits += 0.5 - # Per-tier AUCs + high_mean = mean(high_total) + low_mean = mean(low_total) + highs_means.append(high_mean) + lows_means.append(low_mean) + deltas.append(low_mean - high_mean) + _, hits, total = auc_from(high_total, low_total) + auc_overall_hits += hits + auc_overall_total += total for tier in SEVERITY_TIERS: - ht = by_pair[pid]["high"][tier] - lt = by_pair[pid]["low"][tier] - for cl in lt: - for ch in ht: - if cl > ch: - auc_tier_hits[tier] += 1 - auc_tier_wins[tier] += 1 - elif cl == ch: - auc_tier_hits[tier] += 0.5 - auc_tier_total[tier] += 1 + _, tier_hits, tier_total = auc_from(by_proxy[proxy_id]["high"][tier], + by_proxy[proxy_id]["low"][tier]) + auc_tier_hits[tier] += tier_hits + auc_tier_total[tier] += tier_total if not highs_means: return {} - h = mean(highs_means) - l = mean(lows_means) - d = mean(deltas) - pct = (d / h * 100) if h else float("nan") + c_high = mean(highs_means) + c_low = mean(lows_means) + delta = mean(deltas) + pct = (delta / c_high * 100) if c_high else float("nan") out = { "n_papers": len(counts_for_cell), - "c_high": h, - "c_low": l, - "delta": d, + "c_high": c_high, + "c_low": c_low, + "delta": delta, "pct_increase": pct, "auc_overall": auc_overall_hits / auc_overall_total if auc_overall_total else float("nan"), - "auc_overall_wins": auc_overall_wins, "auc_overall_total": auc_overall_total, } for tier in SEVERITY_TIERS: - tot = auc_tier_total[tier] - out[f"auc_{tier}"] = auc_tier_hits[tier] / tot if tot else float("nan") - out[f"auc_{tier}_wins"] = auc_tier_wins[tier] - out[f"auc_{tier}_total"] = tot + total = auc_tier_total[tier] + out[f"auc_{tier}"] = auc_tier_hits[tier] / total if total else float("nan") + out[f"auc_{tier}_total"] = total return out @@ -207,7 +166,7 @@ def render_markdown(rows: list[tuple[str, str, dict]]) -> str: lines = [] lines.append( "| method | model | n_papers | c_high | c_low | Δ | %inc | " - "AUC overall | AUC major | AUC mod | AUC minor | pairs |" + "AUC overall | AUC major | AUC moderate | AUC minor | pairs |" ) lines.append( "|---|---|---|---|---|---|---|---|---|---|---|---|" @@ -228,6 +187,8 @@ def render_markdown(rows: list[tuple[str, str, dict]]) -> str: def main() -> int: + """CLI: load the manifest and result dirs, compute each (method, model) + cell's point summary, and print them as a markdown table.""" p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) p.add_argument("--manifest", required=True, type=Path, help="Path to manifest JSON (e.g., manifests/v2/combined.json).") p.add_argument("--dir", nargs="+", required=True, dest="dirs", diff --git a/tests/test_ci_auc.py b/tests/test_ci_auc.py new file mode 100644 index 0000000..9f00dd5 --- /dev/null +++ b/tests/test_ci_auc.py @@ -0,0 +1,106 @@ +"""Unit tests for the conference-study AUC + bootstrap-CI helpers (ci_auc.py). + +Uses tiny in-memory count tables (no result files), so the math is checkable by +hand: two high-quality papers with few comments and two low-quality papers with +more, giving perfect low-over-high separation (AUC = 1.0). +""" + +import math +import sys +from pathlib import Path + +_ANALYSES = Path(__file__).resolve().parents[1] / "benchmarks" / "conference_study" / "analyses" +if str(_ANALYSES) not in sys.path: + sys.path.insert(0, str(_ANALYSES)) + +from ci_auc import ( # noqa: E402 + DISPATCH, + auc_from, + bootstrap, + bootstrap_tiers, + by_proxy_recs, + by_proxy_totals, +) +from compute_auc import cell_summary # noqa: E402 + + +# ---- auc_from: pairwise accuracy over the high x low outer product ---- + +def test_auc_from_perfect_separation(): + auc, hits, total = auc_from([1, 1], [2, 2]) # every low > every high + assert auc == 1.0 and hits == 4.0 and total == 4 + + +def test_auc_from_reversed(): + auc, _, _ = auc_from([2, 2], [1, 1]) + assert auc == 0.0 + + +def test_auc_from_ties_get_half_credit(): + auc, hits, total = auc_from([1], [1]) + assert auc == 0.5 and hits == 0.5 and total == 1 + + +def test_auc_from_empty_side_is_nan(): + auc, hits, total = auc_from([], [1, 2]) + assert math.isnan(auc) and hits == 0.0 and total == 0 + + +# ---- cell_summary / bootstrap on a toy cell ---- + +def _toy_cell(): + """One proxy: 2 high papers (2 comments each), 2 low papers (5 each).""" + counts = { + "p1": {"total": 2, "major": 1, "moderate": 1, "minor": 0}, + "p2": {"total": 2, "major": 1, "moderate": 1, "minor": 0}, + "p3": {"total": 5, "major": 2, "moderate": 2, "minor": 1}, + "p4": {"total": 5, "major": 2, "moderate": 2, "minor": 1}, + } + mem = { + "p1": [{"pair": 1, "side": "high"}], "p2": [{"pair": 1, "side": "high"}], + "p3": [{"pair": 1, "side": "low"}], "p4": [{"pair": 1, "side": "low"}], + } + return counts, mem + + +def test_cell_summary_point_estimates(): + s = cell_summary(*_toy_cell()) + assert s["c_high"] == 2.0 + assert s["c_low"] == 5.0 + assert s["delta"] == 3.0 + assert s["auc_overall"] == 1.0 # low always exceeds high + assert s["auc_major"] == 1.0 + + +def test_bootstrap_tiers_ci_shape(): + ci = bootstrap_tiers(by_proxy_recs(*_toy_cell()), ("total", "major")) + for tier in ("total", "major"): + lo, hi = ci[tier] + assert 0.0 <= lo <= hi <= 1.0 + assert hi == 1.0 # degenerate perfect-separation data + + +def test_bootstrap_overall_and_per_proxy(): + (lo, hi), per_proxy = bootstrap(by_proxy_totals(*_toy_cell())) + assert 0.0 <= lo <= hi <= 1.0 + assert 1 in per_proxy # CI present for the one proxy + + +def test_bootstrap_single_sided_proxy_no_crash(): + # proxy 2 has only a high-quality paper -> its per-proxy CI is nan, not a crash + counts = { + "p1": {"total": 2, "major": 1, "moderate": 1, "minor": 0}, # proxy 1 high + "p2": {"total": 5, "major": 2, "moderate": 2, "minor": 1}, # proxy 1 low + "p3": {"total": 3, "major": 1, "moderate": 1, "minor": 1}, # proxy 2 high only + } + mem = { + "p1": [{"pair": 1, "side": "high"}], "p2": [{"pair": 1, "side": "low"}], + "p3": [{"pair": 2, "side": "high"}], + } + (lo, hi), per_proxy = bootstrap(by_proxy_totals(counts, mem)) + assert math.isnan(per_proxy[2][0]) and math.isnan(per_proxy[2][1]) + assert 0.0 <= per_proxy[1][0] <= per_proxy[1][1] <= 1.0 + + +def test_dispatch_covers_the_three_kinds(): + assert set(DISPATCH) == {"comment_volume", "by_proxy", "by_severity"}