From 1bf9d74fdebfb79cf1eaaec41392d1d7bcd0c644 Mon Sep 17 00:00:00 2001 From: Vasily Ilin Date: Wed, 1 Jul 2026 23:56:13 -0700 Subject: [PATCH 1/5] feat: add /check-golf bot verifying statements are unchanged Add scripts/check_golf.py and a check-golf GitHub workflow. When a `/check-golf` comment is posted on a PR, the bot compares the PR's base and head, checks that no declaration statement (signature/type) changed and only proofs/bodies changed, and upserts a single findings comment (posting it once, editing it on later runs). The Lean sources are parsed textually (no build): comments and whitespace are ignored, declarations are segmented on column-0 command starts, each statement is split from its body at the top-level `:=`, `where`, or equation `|` arm, and `by` proof terms embedded inside a type are treated as proof-irrelevant. Co-authored-by: Claude Opus 4.8 Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01Vjiv6nckrp5cJT9rpgZaKT --- .github/workflows/check-golf.yml | 72 ++++ scripts/README.md | 17 + scripts/check_golf.py | 627 +++++++++++++++++++++++++++++++ 3 files changed, 716 insertions(+) create mode 100644 .github/workflows/check-golf.yml create mode 100755 scripts/check_golf.py diff --git a/.github/workflows/check-golf.yml b/.github/workflows/check-golf.yml new file mode 100644 index 000000000..4b5866b39 --- /dev/null +++ b/.github/workflows/check-golf.yml @@ -0,0 +1,72 @@ +# Reacts to a `/check-golf` comment on a pull request. It verifies that the PR +# only golfs proofs -- i.e. that no declaration *statement* (signature/type) +# changed, only proofs and bodies -- and posts the findings as a single PR +# comment, editing that comment in place on subsequent runs. +# +# The check runs on `issue_comment`, so it uses the workflow file from the +# default branch and only parses the PR's Lean sources textually (it does not +# build them). + +name: Check golf + +on: + issue_comment: + types: [created] + +# Only the permissions needed to read the repo and upsert a PR comment. +permissions: + contents: read + pull-requests: write + +jobs: + check-golf: + name: Verify only proofs changed + # Run only for `/check-golf` comments posted on a pull request. + if: >- + github.event.issue.pull_request && + contains(github.event.comment.body, '/check-golf') + runs-on: ubuntu-latest + steps: + - name: Acknowledge the command + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMMENT_ID: ${{ github.event.comment.id }} + run: | + gh api --method POST \ + "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions" \ + -f content=eyes || true + + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Resolve base and head revisions + id: refs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR: ${{ github.event.issue.number }} + run: | + base_branch=$(gh pr view "$PR" --json baseRefName -q .baseRefName) + head_sha=$(gh pr view "$PR" --json headRefOid -q .headRefOid) + git fetch --no-tags origin "$base_branch" + git fetch --no-tags origin "pull/$PR/head:check-golf-head" + merge_base=$(git merge-base "origin/$base_branch" "$head_sha") + echo "base=$merge_base" >> "$GITHUB_OUTPUT" + echo "head=$head_sha" >> "$GITHUB_OUTPUT" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Run check-golf and upsert the report comment + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python scripts/check_golf.py \ + --base "${{ steps.refs.outputs.base }}" \ + --head "${{ steps.refs.outputs.head }}" \ + --repo "${{ github.repository }}" \ + --pr "${{ github.event.issue.number }}" \ + --post diff --git a/scripts/README.md b/scripts/README.md index 3c389febc..8060c158d 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -34,3 +34,20 @@ This linter may need running a number of times. - `lake exe module_doc_lint` : Checks that module documentation is laid out according to a set standard. This does not check any file in the list `./scripts/MetaPrograms/module_doc_no_lint.txt`. Slowly we will empty this list of files. - `lake exe spelling` : Checks the spelling of words in Physlib against a given list of correctly spelled words which can be found in `./scripts/MetaPrograms/spellingWords.txt` + +## Checking golf pull requests + +- `scripts/check_golf.py` : Verifies that a pull request only *golfs* proofs, i.e. + that no declaration statement (its signature/type) changed and only proofs and + definition bodies changed. Comment `/check-golf` on a PR to run it via the + [`check-golf`](../.github/workflows/check-golf.yml) workflow; the bot posts its + findings as a single PR comment and edits that same comment on subsequent runs. + To run it locally against two revisions: + + ``` + scripts/check_golf.py --base --head + ``` + + It parses the changed Lean files textually (no build required): comments and + whitespace are ignored, and `by` proof terms embedded inside a type are treated + as proofs (proof-irrelevant). Anonymous instances and `example`s are not tracked. diff --git a/scripts/check_golf.py b/scripts/check_golf.py new file mode 100755 index 000000000..d85076676 --- /dev/null +++ b/scripts/check_golf.py @@ -0,0 +1,627 @@ +#!/usr/bin/env python3 +"""check_golf.py -- verify that a pull request only golfs proofs. + +A "golf" changes the *proofs* of lemmas/theorems (and the *bodies* of +definitions) without touching their *statements* (the signature: the name, +the binders and the type). This script compares two git revisions of the +repository and, for every declaration in the changed Lean files, decides +whether its statement was preserved and only the proof/body changed. + +It is meant to be driven from a GitHub workflow that reacts to a +``/check-golf`` comment on a pull request: it produces a Markdown report and, +when asked, upserts that report as a single PR comment (posting it the first +time, editing it on subsequent runs). + +The Lean side is parsed textually -- no build is required, which keeps the +check fast. The parser strips comments and string literals, tracks bracket +depth, and splits each declaration at the top-level ``:=`` (or ``where``) that +separates its statement from its body. That top-level split is what lets it +ignore ``:=`` inside default-valued binders and ``by`` tactic blocks that live +*inside* a type (e.g. ``⟨x, by omega⟩``). + +Usage (analysis / dry-run, prints Markdown to stdout): + + scripts/check_golf.py --base --head + +Usage (from CI, upserts the PR comment): + + scripts/check_golf.py --base --head \ + --repo owner/name --pr 1234 --post + +The GitHub token is read from ``$GITHUB_TOKEN`` (override with ``--token-env``). +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import subprocess +import sys +import urllib.request +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +# The hidden marker lets the workflow find the comment it previously posted so +# it can edit it in place instead of piling up duplicates. +COMMENT_MARKER = "" + +# Keywords that introduce a named declaration whose statement we want to track. +DECL_KEYWORDS = ( + "theorem", + "lemma", + "def", + "abbrev", + "instance", + "example", + "structure", + "inductive", + "class", + "opaque", +) + +# A declaration is a "proof" (statement + proof) versus a "definition" +# (statement + defining term). Golfing either preserves the statement. +PROOF_KEYWORDS = {"theorem", "lemma", "example"} + +# Modifiers that may sit between the start of a line and the declaration keyword +# (attributes ``@[...]`` and ``set_option ... in`` / ``open ... in`` prefixes are +# handled structurally, not through this set). +MODIFIERS = { + "private", + "protected", + "noncomputable", + "unsafe", + "partial", + "nonrec", + "scoped", + "local", + "mutual", +} + +# Keywords that open/close a namespacing scope. +SCOPE_KEYWORDS = {"namespace", "section", "end"} + +OPEN_BRACKETS = "([{⟨⦃" +CLOSE_BRACKETS = ")]}⟩⦄" + +# Characters that terminate the name token following a declaration keyword. +NAME_STOP = set(" \t\r\n({[⦃⟨:=") + + +# --------------------------------------------------------------------------- # +# Lean lexical pre-processing +# --------------------------------------------------------------------------- # +def code_view(text: str) -> str: + """Return ``text`` with comments and string literals blanked to spaces. + + Length and newline positions are preserved so indices computed on the + result line up with the original source. Block comments (``/- -/``) nest, + which also covers doc comments (``/-- -/``) and module docs (``/-! -/``). + """ + out: List[str] = [] + i, n = 0, len(text) + NORMAL, LINE, BLOCK, STR = 0, 1, 2, 3 + state = NORMAL + block_depth = 0 + while i < n: + c = text[i] + two = text[i:i + 2] + if state == NORMAL: + if two == "--": + out.append(" ") + i += 2 + state = LINE + continue + if two == "/-": + out.append(" ") + i += 2 + state = BLOCK + block_depth = 1 + continue + if c == '"': + out.append(" ") + i += 1 + state = STR + continue + out.append(c) + i += 1 + elif state == LINE: + if c == "\n": + out.append("\n") + state = NORMAL + else: + out.append(" ") + i += 1 + elif state == BLOCK: + if two == "/-": + out.append(" ") + i += 2 + block_depth += 1 + continue + if two == "-/": + out.append(" ") + i += 2 + block_depth -= 1 + if block_depth == 0: + state = NORMAL + continue + out.append("\n" if c == "\n" else " ") + i += 1 + else: # STR + if c == "\\": + out.append(" " if i + 1 < n else " ") + i += 2 + continue + if c == '"': + out.append(" ") + state = NORMAL + i += 1 + continue + out.append("\n" if c == "\n" else " ") + i += 1 + return "".join(out) + + +def normalize(fragment: str) -> str: + """Collapse whitespace so pure reformatting is not seen as a change.""" + return " ".join(fragment.split()) + + +def _ident_char(c: str) -> bool: + return c.isalnum() or c in "_.'" or ord(c) > 127 + + +def mask_by_blocks(sig_code: str) -> str: + """Blank the contents of ``by`` tactic blocks embedded inside a statement. + + A statement's *type* can embed proof terms -- ``⟨x, by omega⟩`` or an + argument such as ``(by decide)``. By proof irrelevance those tactic blocks + do not affect the elaborated type, so golfing them keeps the statement. + Each embedded ``by`` block runs to the end of its enclosing bracket group; + we replace it with a placeholder so signatures that differ only inside such + blocks compare equal. + """ + out: List[str] = [] + i, n = 0, len(sig_code) + depth = 0 + while i < n: + if (sig_code[i:i + 2] == "by" + and (i == 0 or not _ident_char(sig_code[i - 1])) + and (i + 2 >= n or not _ident_char(sig_code[i + 2]))): + d0 = depth + out.append("by<>") + i += 2 + while i < n: + c = sig_code[i] + if c in OPEN_BRACKETS: + depth += 1 + elif c in CLOSE_BRACKETS: + if depth == d0: + break # close bracket of the enclosing group + depth -= 1 + i += 1 + continue + c = sig_code[i] + if c in OPEN_BRACKETS: + depth += 1 + elif c in CLOSE_BRACKETS: + depth = max(0, depth - 1) + out.append(c) + i += 1 + return normalize("".join(out)) + + +# --------------------------------------------------------------------------- # +# Declaration extraction +# --------------------------------------------------------------------------- # +@dataclass +class Decl: + name: str + keyword: str + signature: str # normalized statement (keyword .. up to `:=`/`where`) + signature_masked: str # same, with embedded `by` proof blocks blanked + body: str # normalized defining term / proof + + @property + def is_proof(self) -> bool: + return self.keyword in PROOF_KEYWORDS + + +def _line_prefix(code: str, pos: int) -> str: + """The text on ``pos``'s line, up to ``pos`` (from the previous newline).""" + start = code.rfind("\n", 0, pos) + 1 + return code[start:pos] + + +_WHERE_RE = re.compile(r"(? List[int]: + """Indices of top-level command starts: non-blank characters in column 0. + + In Mathlib-style Lean, top-level commands (``lemma``, ``def``, + ``namespace``, ...) begin in column 0, while everything that belongs to a + command -- multi-line signatures, tactic blocks, term proofs -- is + indented. Segmenting on column-0 lines is therefore robust even when a + proof body contains brackets we cannot balance textually (e.g. a ``by`` + block closed by indentation rather than a ``)``). + """ + starts = [0] if code and not code[0].isspace() else [] + for i in range(1, len(code)): + if code[i - 1] == "\n" and not code[i].isspace() and code[i] != "\n": + starts.append(i) + return starts + + +def _read_token(code: str, pos: int, end: int) -> str: + j = pos + while j < end and code[j] not in NAME_STOP and code[j] != "@": + j += 1 + return code[pos:j] + + +def _strip_prefixes(code: str, start: int, end: int) -> int: + """Skip attributes / modifiers / ``... in`` prefixes; return keyword index.""" + pos = start + while pos < end: + while pos < end and code[pos] in " \t\r\n": + pos += 1 + if pos >= end: + break + if code[pos] == "@" and pos + 1 < end and code[pos + 1] == "[": + depth = 0 + while pos < end: + if code[pos] == "[": + depth += 1 + elif code[pos] == "]": + depth -= 1 + if depth == 0: + pos += 1 + break + pos += 1 + continue + tok = _read_token(code, pos, end) + if tok in MODIFIERS: + pos += len(tok) + continue + if tok in ("set_option", "open"): + m = re.compile(r"(? int: + """Index in ``[start, end)`` where the statement ends and the body begins. + + Bracket depth is tracked *locally* to this declaration. The body begins at + the first top-level ``:=`` or ``where``, or -- for equation-compiler + declarations that have neither -- at the first line-leading ``|`` arm + (recognised by a following top-level ``=>``, which distinguishes a real arm + from an absolute value ``|x|`` in a type). Returns ``end`` if none found. + """ + b_assign = b_where = b_bar = None + seen_bars: List[int] = [] + depth = 0 + i = start + while i < end: + c = code[i] + if depth == 0: + if b_assign is None and code[i:i + 2] == ":=": + b_assign = i + elif code[i:i + 2] == "=>" and seen_bars and b_bar is None: + b_bar = seen_bars[0] + elif b_where is None and _WHERE_RE.match(code, i): + b_where = i + if c == "|" and _line_prefix(code, i).strip() == "": + seen_bars.append(i) + if c in OPEN_BRACKETS: + depth += 1 + elif c in CLOSE_BRACKETS: + depth = max(0, depth - 1) + i += 1 + candidates = [b for b in (b_assign, b_where, b_bar) if b is not None] + return min(candidates) if candidates else end + + +def _apply_scope(stack: List[Tuple[str, str]], keyword: str, name: str) -> None: + if keyword == "namespace": + stack.append(("namespace", name)) + elif keyword == "section": + stack.append(("section", name)) + else: # end + if name: + for k in range(len(stack) - 1, -1, -1): + if stack[k][1] == name: + del stack[k:] + return + if stack: + stack.pop() + elif stack: + stack.pop() + + +def parse_decls(text: str) -> "Dict[str, Decl]": + """Parse ``text`` into a map from qualified name to :class:`Decl`.""" + code = code_view(text) + starts = _block_starts(code) + ns_stack: List[Tuple[str, str]] = [] + result: "Dict[str, Decl]" = {} + seen: Dict[str, int] = {} + + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(code) + kw_pos = _strip_prefixes(code, start, end) + keyword = _read_token(code, kw_pos, end) + + if keyword in SCOPE_KEYWORDS: + rest = code[kw_pos + len(keyword):end].split("\n", 1)[0].strip() + name = rest.split()[0] if rest else "" + _apply_scope(ns_stack, keyword, name) + continue + if keyword not in DECL_KEYWORDS: + continue + + # Parse the declaration name. + j = kw_pos + len(keyword) + while j < end and code[j] in " \t\r\n": + j += 1 + k = j + while k < end and code[k] not in NAME_STOP: + k += 1 + local = code[j:k] + if not local: + continue # anonymous instance / example -- cannot track by name + + boundary = _find_boundary(code, k, end) + sig_code = code[kw_pos:boundary] + signature = normalize(sig_code) + signature_masked = mask_by_blocks(sig_code) + body_start = boundary + (2 if code[boundary:boundary + 2] == ":=" else 0) + body = normalize(code[body_start:end]) + + prefix = ".".join(nm for kind, nm in ns_stack + if kind == "namespace" and nm) + qualified = (prefix + "." + local) if prefix else local + if qualified in seen: + seen[qualified] += 1 + qualified = "{}#{}".format(qualified, seen[qualified]) + else: + seen[qualified] = 0 + result[qualified] = Decl(qualified, keyword, signature, + signature_masked, body) + return result + + +# --------------------------------------------------------------------------- # +# Comparison +# --------------------------------------------------------------------------- # +@dataclass +class FileReport: + path: str + statement_changed: List[str] = field(default_factory=list) + proof_golfed: List[str] = field(default_factory=list) + embedded_proof_changed: List[str] = field(default_factory=list) + def_body_changed: List[str] = field(default_factory=list) + added: List[str] = field(default_factory=list) + removed: List[str] = field(default_factory=list) + + @property + def touched(self) -> bool: + return bool(self.statement_changed or self.proof_golfed + or self.embedded_proof_changed or self.def_body_changed + or self.added or self.removed) + + +def compare_file(path: str, base_src: Optional[str], + head_src: Optional[str]) -> FileReport: + rep = FileReport(path) + base = parse_decls(base_src) if base_src is not None else {} + head = parse_decls(head_src) if head_src is not None else {} + for name, d in head.items(): + if name not in base: + rep.added.append(name) + continue + b = base[name] + if b.signature == d.signature: + if b.body != d.body: + (rep.proof_golfed if d.is_proof + else rep.def_body_changed).append(name) + elif b.signature_masked == d.signature_masked: + # Only embedded proof terms differ: the type is unchanged. + rep.embedded_proof_changed.append(name) + else: + rep.statement_changed.append(name) + for name in base: + if name not in head: + rep.removed.append(name) + return rep + + +# --------------------------------------------------------------------------- # +# Git helpers +# --------------------------------------------------------------------------- # +def git(*args: str) -> str: + return subprocess.run(["git", *args], check=True, capture_output=True, + text=True).stdout + + +def git_maybe(*args: str) -> Optional[str]: + p = subprocess.run(["git", *args], capture_output=True, text=True) + return p.stdout if p.returncode == 0 else None + + +def changed_lean_files(base: str, head: str) -> List[str]: + out = git("diff", "--name-only", "--diff-filter=ACMRD", base, head) + return [f for f in out.splitlines() if f.endswith(".lean")] + + +def file_at(ref: str, path: str) -> Optional[str]: + return git_maybe("show", "{}:{}".format(ref, path)) + + +# --------------------------------------------------------------------------- # +# Report rendering +# --------------------------------------------------------------------------- # +def _bullets(names: List[str], path: str) -> str: + return "\n".join("- `{}` — `{}`".format(n, path) for n in sorted(names)) + + +def render_report(base: str, head: str, reports: List[FileReport]) -> str: + stmt = [(r.path, n) for r in reports for n in r.statement_changed] + golfed = [(r.path, n) for r in reports for n in r.proof_golfed] + embedded = [(r.path, n) for r in reports for n in r.embedded_proof_changed] + defbody = [(r.path, n) for r in reports for n in r.def_body_changed] + added = [(r.path, n) for r in reports for n in r.added] + removed = [(r.path, n) for r in reports for n in r.removed] + n_files = sum(1 for r in reports if r.touched) + + lines: List[str] = [COMMENT_MARKER, "## 🏌️ check-golf report", ""] + lines.append("Comparing base `{}` → head `{}` across **{}** changed Lean " + "file(s).".format(base[:9], head[:9], n_files)) + lines.append("") + if stmt: + lines.append("**Result: ❌ Statements changed.** {} declaration(s) " + "changed their statement — this is more than a golf. See " + "below.".format(len(stmt))) + elif golfed or embedded or defbody: + lines.append("**Result: ✅ Statements preserved.** Every changed " + "declaration kept its statement; only proofs / bodies " + "changed.") + else: + lines.append("**Result: ✅ No declaration statements changed.**") + lines.append("") + + lines.append("| Category | Count |") + lines.append("|---|---:|") + lines.append("| Proofs golfed (statement unchanged) | {} |".format(len(golfed))) + lines.append("| Statements changed | {} |".format(len(stmt))) + lines.append("| Embedded proofs golfed (type unchanged) | {} |".format(len(embedded))) + lines.append("| Definition bodies changed (type unchanged) | {} |".format(len(defbody))) + lines.append("| Declarations added | {} |".format(len(added))) + lines.append("| Declarations removed | {} |".format(len(removed))) + lines.append("") + + def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: + if not items: + return + lines.append("{} ({})\n".format( + " open" if open_ else "", title, len(items))) + for path, name in sorted(items): + lines.append("- `{}` — `{}`".format(name, path)) + lines.append("\n") + lines.append("") + + section("❌ Statements changed", stmt, open_=True) + section("✅ Proofs golfed", golfed, open_=False) + section("✅ Embedded proofs golfed (type unchanged)", embedded, open_=False) + section("🔧 Definition bodies changed (type unchanged)", defbody, open_=False) + section("➕ Declarations added", added, open_=False) + section("➖ Declarations removed", removed, open_=False) + + lines.append( + "Generated by scripts/check_golf.py · triggered by " + "/check-golf. Statements are compared textually: comments " + "and whitespace are ignored, and by proof terms embedded " + "in a type are treated as proofs (proof-irrelevant). Anonymous " + "instances and examples are not tracked (no stable " + "name).") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- # +# GitHub comment upsert +# --------------------------------------------------------------------------- # +def _api(method: str, url: str, token: str, + payload: Optional[dict] = None) -> dict: + data = json.dumps(payload).encode() if payload is not None else None + req = urllib.request.Request(url, data=data, method=method) + req.add_header("Authorization", "Bearer " + token) + req.add_header("Accept", "application/vnd.github+json") + req.add_header("X-GitHub-Api-Version", "2022-11-28") + if data is not None: + req.add_header("Content-Type", "application/json") + with urllib.request.urlopen(req) as resp: + raw = resp.read().decode() + return json.loads(raw) if raw else {} + + +def find_existing_comment(repo: str, pr: int, token: str) -> Optional[int]: + page = 1 + while True: + url = ("https://api.github.com/repos/{}/issues/{}/comments" + "?per_page=100&page={}".format(repo, pr, page)) + req = urllib.request.Request(url) + req.add_header("Authorization", "Bearer " + token) + req.add_header("Accept", "application/vnd.github+json") + with urllib.request.urlopen(req) as resp: + batch = json.loads(resp.read().decode()) + if not batch: + return None + for comment in batch: + if COMMENT_MARKER in (comment.get("body") or ""): + return comment["id"] + if len(batch) < 100: + return None + page += 1 + + +def upsert_comment(repo: str, pr: int, token: str, body: str) -> str: + existing = find_existing_comment(repo, pr, token) + if existing is not None: + _api("PATCH", + "https://api.github.com/repos/{}/issues/comments/{}".format( + repo, existing), token, {"body": body}) + return "edited comment {}".format(existing) + created = _api("POST", + "https://api.github.com/repos/{}/issues/{}/comments".format( + repo, pr), token, {"body": body}) + return "created comment {}".format(created.get("id")) + + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def build_reports(base: str, head: str) -> List[FileReport]: + reports = [] + for path in changed_lean_files(base, head): + rep = compare_file(path, file_at(base, path), file_at(head, path)) + if rep.touched: + reports.append(rep) + return reports + + +def main(argv: Optional[List[str]] = None) -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--base", required=True, help="base git ref (merge-base)") + ap.add_argument("--head", required=True, help="head git ref") + ap.add_argument("--repo", help="owner/name, required with --post") + ap.add_argument("--pr", type=int, help="PR number, required with --post") + ap.add_argument("--post", action="store_true", + help="upsert the report as a PR comment") + ap.add_argument("--token-env", default="GITHUB_TOKEN", + help="env var holding the GitHub token (default GITHUB_TOKEN)") + args = ap.parse_args(argv) + + reports = build_reports(args.base, args.head) + body = render_report(args.base, args.head, reports) + + if args.post: + if not (args.repo and args.pr): + ap.error("--post requires --repo and --pr") + token = os.environ.get(args.token_env, "") + if not token: + ap.error("no token in ${}".format(args.token_env)) + status = upsert_comment(args.repo, args.pr, token, body) + print(status, file=sys.stderr) + else: + print(body) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 985352cfb9f1389cfbae37261a49a64c8613fe85 Mon Sep 17 00:00:00 2001 From: Vasily Ilin Date: Fri, 3 Jul 2026 00:24:08 -0700 Subject: [PATCH 2/5] feat: break golfs down by trivial shape (newline removed, `;`-joins) The report now also counts, among statement-preserving changes: - proofs where only a newline was removed (identical tokens, fewer lines), which were previously not surfaced at all; and - proofs where tactics were crammed onto one line with a `;` (excluding the `<;>` combinator), with the number of joins introduced. Also relabel "definition bodies changed" to "definition values changed" and spell out in the footer that it means a def/instance/abbrev whose type is unchanged but whose defining term changed. Co-authored-by: Claude Opus 4.8 Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01Vjiv6nckrp5cJT9rpgZaKT --- scripts/README.md | 2 + scripts/check_golf.py | 93 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 18 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index 8060c158d..caa67f8b8 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -51,3 +51,5 @@ This linter may need running a number of times. It parses the changed Lean files textually (no build required): comments and whitespace are ignored, and `by` proof terms embedded inside a type are treated as proofs (proof-irrelevant). Anonymous instances and `example`s are not tracked. + It also breaks the golfs down by trivial shape: proofs where only a newline was + removed, and proofs where tactics were joined onto one line with a `;`. diff --git a/scripts/check_golf.py b/scripts/check_golf.py index d85076676..0ae7d39d8 100755 --- a/scripts/check_golf.py +++ b/scripts/check_golf.py @@ -5,7 +5,10 @@ definitions) without touching their *statements* (the signature: the name, the binders and the type). This script compares two git revisions of the repository and, for every declaration in the changed Lean files, decides -whether its statement was preserved and only the proof/body changed. +whether its statement was preserved and only the proof/body changed. It also +counts the trivial reshapes among the golfs: proofs where only a newline was +removed (same tokens, fewer lines), and proofs where tactics were joined onto +one line with a ``;`` (excluding the ``<;>`` combinator). It is meant to be driven from a GitHub workflow that reacts to a ``/check-golf`` comment on a pull request: it produces a Markdown report and, @@ -169,6 +172,20 @@ def normalize(fragment: str) -> str: return " ".join(fragment.split()) +# A standalone `;` tactic separator, i.e. not part of the `<;>` combinator. +_SEMI_RE = re.compile(r"(?)") + + +def code_lines(fragment: str) -> List[str]: + """Non-blank lines of a (comment-stripped) fragment, each stripped.""" + return [line.strip() for line in fragment.split("\n") if line.strip()] + + +def semi_count(fragment: str) -> int: + """Number of standalone `;` tactic separators (ignoring `<;>`).""" + return len(_SEMI_RE.findall(fragment)) + + def _ident_char(c: str) -> bool: return c.isalnum() or c in "_.'" or ord(c) > 127 @@ -223,6 +240,7 @@ class Decl: signature: str # normalized statement (keyword .. up to `:=`/`where`) signature_masked: str # same, with embedded `by` proof blocks blanked body: str # normalized defining term / proof + body_raw: str # defining term / proof, comments stripped, layout kept @property def is_proof(self) -> bool: @@ -382,7 +400,8 @@ def parse_decls(text: str) -> "Dict[str, Decl]": signature = normalize(sig_code) signature_masked = mask_by_blocks(sig_code) body_start = boundary + (2 if code[boundary:boundary + 2] == ":=" else 0) - body = normalize(code[body_start:end]) + body_raw = code[body_start:end] + body = normalize(body_raw) prefix = ".".join(nm for kind, nm in ns_stack if kind == "namespace" and nm) @@ -393,7 +412,7 @@ def parse_decls(text: str) -> "Dict[str, Decl]": else: seen[qualified] = 0 result[qualified] = Decl(qualified, keyword, signature, - signature_masked, body) + signature_masked, body, body_raw) return result @@ -409,12 +428,15 @@ class FileReport: def_body_changed: List[str] = field(default_factory=list) added: List[str] = field(default_factory=list) removed: List[str] = field(default_factory=list) + # Trivial golf shapes (subsets / refinements of the above). + newline_removed: List[str] = field(default_factory=list) + semicolon_crammed: List[Tuple[str, int]] = field(default_factory=list) @property def touched(self) -> bool: return bool(self.statement_changed or self.proof_golfed or self.embedded_proof_changed or self.def_body_changed - or self.added or self.removed) + or self.added or self.removed or self.newline_removed) def compare_file(path: str, base_src: Optional[str], @@ -427,15 +449,25 @@ def compare_file(path: str, base_src: Optional[str], rep.added.append(name) continue b = base[name] - if b.signature == d.signature: - if b.body != d.body: - (rep.proof_golfed if d.is_proof - else rep.def_body_changed).append(name) - elif b.signature_masked == d.signature_masked: - # Only embedded proof terms differ: the type is unchanged. - rep.embedded_proof_changed.append(name) - else: - rep.statement_changed.append(name) + if b.signature != d.signature: + if b.signature_masked == d.signature_masked: + # Only embedded proof terms differ: the type is unchanged. + rep.embedded_proof_changed.append(name) + else: + rep.statement_changed.append(name) + continue + # Statement preserved; classify how the proof/body changed. + if b.body != d.body: + # A real token-level change to the proof / defining term. + (rep.proof_golfed if d.is_proof + else rep.def_body_changed).append(name) + crammed = semi_count(d.body_raw) - semi_count(b.body_raw) + if crammed > 0: + rep.semicolon_crammed.append((name, crammed)) + elif len(code_lines(b.body_raw)) > len(code_lines(d.body_raw)): + # Same tokens, fewer lines: the proof was only reflowed (a newline + # was deleted) without joining tactics with `;`. + rep.newline_removed.append(name) for name in base: if name not in head: rep.removed.append(name) @@ -478,6 +510,9 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: defbody = [(r.path, n) for r in reports for n in r.def_body_changed] added = [(r.path, n) for r in reports for n in r.added] removed = [(r.path, n) for r in reports for n in r.removed] + newline = [(r.path, n) for r in reports for n in r.newline_removed] + crammed = [(r.path, n, c) for r in reports for n, c in r.semicolon_crammed] + cram_total = sum(c for _, _, c in crammed) n_files = sum(1 for r in reports if r.touched) lines: List[str] = [COMMENT_MARKER, "## 🏌️ check-golf report", ""] @@ -488,7 +523,7 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: lines.append("**Result: ❌ Statements changed.** {} declaration(s) " "changed their statement — this is more than a golf. See " "below.".format(len(stmt))) - elif golfed or embedded or defbody: + elif golfed or embedded or defbody or newline: lines.append("**Result: ✅ Statements preserved.** Every changed " "declaration kept its statement; only proofs / bodies " "changed.") @@ -501,10 +536,19 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: lines.append("| Proofs golfed (statement unchanged) | {} |".format(len(golfed))) lines.append("| Statements changed | {} |".format(len(stmt))) lines.append("| Embedded proofs golfed (type unchanged) | {} |".format(len(embedded))) - lines.append("| Definition bodies changed (type unchanged) | {} |".format(len(defbody))) + lines.append("| Definition values changed (type unchanged) | {} |".format(len(defbody))) lines.append("| Declarations added | {} |".format(len(added))) lines.append("| Declarations removed | {} |".format(len(removed))) lines.append("") + lines.append("Of the golfed proofs/bodies above, the trivial reshapes were:") + lines.append("") + lines.append("| Golf shape | Count |") + lines.append("|---|---:|") + lines.append("| Only a newline removed (proof reflowed, tokens unchanged) | {} |" + .format(len(newline))) + lines.append("| Declarations with tactics joined by `;` | {} |".format(len(crammed))) + lines.append("| — total `;` tactic-joins introduced | {} |".format(cram_total)) + lines.append("") def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: if not items: @@ -519,17 +563,30 @@ def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: section("❌ Statements changed", stmt, open_=True) section("✅ Proofs golfed", golfed, open_=False) section("✅ Embedded proofs golfed (type unchanged)", embedded, open_=False) - section("🔧 Definition bodies changed (type unchanged)", defbody, open_=False) + section("🔧 Definition values changed (type unchanged)", defbody, open_=False) + section("↩️ Only a newline removed (tokens unchanged)", newline, open_=False) section("➕ Declarations added", added, open_=False) section("➖ Declarations removed", removed, open_=False) + if crammed: + lines.append("
➡️ Tactics joined by `;` ({})\n" + .format(len(crammed))) + for path, name, count in sorted(crammed): + suffix = "" if count == 1 else " (×{})".format(count) + lines.append("- `{}` — `{}`{}".format(name, path, suffix)) + lines.append("\n
") + lines.append("") + lines.append( "Generated by scripts/check_golf.py · triggered by " "/check-golf. Statements are compared textually: comments " "and whitespace are ignored, and by proof terms embedded " "in a type are treated as proofs (proof-irrelevant). Anonymous " - "instances and examples are not tracked (no stable " - "name).") + "instances and examples are not tracked (no stable name). " + "A “definition value changed” is a def/instance" + "/abbrev whose type is unchanged but whose defining term " + "changed. ; counts exclude the <;> " + "combinator.") return "\n".join(lines) From a091941daa5d4879e5ef8ca3b0a178f25fe4e6b8 Mon Sep 17 00:00:00 2001 From: Vasily Ilin Date: Fri, 3 Jul 2026 00:38:14 -0700 Subject: [PATCH 3/5] feat: split definition changes into proof-only vs value changed A `def`/`instance`/`abbrev` often carries proof obligations discharged by `by` blocks. Golfing only those does not change the definition's data, so it should not read as "the definition changed". The report now masks a definition's `by` proof blocks and splits its changes into: - "definition proofs golfed" (data unchanged), and - "definition values changed" (the defining term changed outside proofs). On PR #1332 this moves 76 of the 110 former "definition body" changes into the proof-only bucket, leaving 34 genuine value changes. Co-authored-by: Claude Opus 4.8 Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01Vjiv6nckrp5cJT9rpgZaKT --- scripts/check_golf.py | 81 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/scripts/check_golf.py b/scripts/check_golf.py index 0ae7d39d8..329ce9165 100755 --- a/scripts/check_golf.py +++ b/scripts/check_golf.py @@ -5,7 +5,9 @@ definitions) without touching their *statements* (the signature: the name, the binders and the type). This script compares two git revisions of the repository and, for every declaration in the changed Lean files, decides -whether its statement was preserved and only the proof/body changed. It also +whether its statement was preserved and only the proof/body changed. For a +definition it further distinguishes changes confined to its ``by`` proof +blocks (data unchanged) from changes to the defining term itself. It also counts the trivial reshapes among the golfs: proofs where only a newline was removed (same tokens, fewer lines), and proofs where tactics were joined onto one line with a ``;`` (excluding the ``<;>`` combinator). @@ -186,6 +188,42 @@ def semi_count(fragment: str) -> int: return len(_SEMI_RE.findall(fragment)) +_BY_RE = re.compile(r"(? str: + """Collapse indentation-delimited ``by`` tactic blocks to a placeholder. + + In a definition, proof *obligations* are usually discharged by ``by`` + blocks (delimited by indentation), while the data lives outside them. + Masking every ``by`` block therefore leaves the data: if two versions of a + definition agree once their ``by`` blocks are masked, only proofs changed + and the definition's data is unchanged. This is a heuristic -- a proof + written as a term rather than ``by`` is not recognised as a proof. + """ + lines = body.split("\n") + out: List[str] = [] + i, n = 0, len(lines) + while i < n: + line = lines[i] + indent = len(line) - len(line.lstrip()) + m = _BY_RE.search(line) + if m: + out.append(line[:m.start()] + "") + i += 1 + while i < n: + nxt = lines[i] + stripped = nxt.lstrip() + if stripped == "" or len(nxt) - len(stripped) > indent: + i += 1 + else: + break + else: + out.append(line) + i += 1 + return normalize("\n".join(out)) + + def _ident_char(c: str) -> bool: return c.isalnum() or c in "_.'" or ord(c) > 127 @@ -425,7 +463,8 @@ class FileReport: statement_changed: List[str] = field(default_factory=list) proof_golfed: List[str] = field(default_factory=list) embedded_proof_changed: List[str] = field(default_factory=list) - def_body_changed: List[str] = field(default_factory=list) + def_proof_golfed: List[str] = field(default_factory=list) + def_value_changed: List[str] = field(default_factory=list) added: List[str] = field(default_factory=list) removed: List[str] = field(default_factory=list) # Trivial golf shapes (subsets / refinements of the above). @@ -435,8 +474,9 @@ class FileReport: @property def touched(self) -> bool: return bool(self.statement_changed or self.proof_golfed - or self.embedded_proof_changed or self.def_body_changed - or self.added or self.removed or self.newline_removed) + or self.embedded_proof_changed or self.def_proof_golfed + or self.def_value_changed or self.added or self.removed + or self.newline_removed) def compare_file(path: str, base_src: Optional[str], @@ -459,8 +499,14 @@ def compare_file(path: str, base_src: Optional[str], # Statement preserved; classify how the proof/body changed. if b.body != d.body: # A real token-level change to the proof / defining term. - (rep.proof_golfed if d.is_proof - else rep.def_body_changed).append(name) + if d.is_proof: + rep.proof_golfed.append(name) + elif mask_by_indent(b.body_raw) == mask_by_indent(d.body_raw): + # A definition whose data is unchanged once `by` proof blocks + # are masked: only its proof obligations were golfed. + rep.def_proof_golfed.append(name) + else: + rep.def_value_changed.append(name) crammed = semi_count(d.body_raw) - semi_count(b.body_raw) if crammed > 0: rep.semicolon_crammed.append((name, crammed)) @@ -507,7 +553,8 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: stmt = [(r.path, n) for r in reports for n in r.statement_changed] golfed = [(r.path, n) for r in reports for n in r.proof_golfed] embedded = [(r.path, n) for r in reports for n in r.embedded_proof_changed] - defbody = [(r.path, n) for r in reports for n in r.def_body_changed] + defproof = [(r.path, n) for r in reports for n in r.def_proof_golfed] + defval = [(r.path, n) for r in reports for n in r.def_value_changed] added = [(r.path, n) for r in reports for n in r.added] removed = [(r.path, n) for r in reports for n in r.removed] newline = [(r.path, n) for r in reports for n in r.newline_removed] @@ -523,7 +570,7 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: lines.append("**Result: ❌ Statements changed.** {} declaration(s) " "changed their statement — this is more than a golf. See " "below.".format(len(stmt))) - elif golfed or embedded or defbody or newline: + elif golfed or embedded or defproof or defval or newline: lines.append("**Result: ✅ Statements preserved.** Every changed " "declaration kept its statement; only proofs / bodies " "changed.") @@ -534,9 +581,10 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: lines.append("| Category | Count |") lines.append("|---|---:|") lines.append("| Proofs golfed (statement unchanged) | {} |".format(len(golfed))) - lines.append("| Statements changed | {} |".format(len(stmt))) lines.append("| Embedded proofs golfed (type unchanged) | {} |".format(len(embedded))) - lines.append("| Definition values changed (type unchanged) | {} |".format(len(defbody))) + lines.append("| Definition proofs golfed (data unchanged) | {} |".format(len(defproof))) + lines.append("| **Definition values changed** (data changed, type unchanged) | {} |".format(len(defval))) + lines.append("| **Statements changed** | {} |".format(len(stmt))) lines.append("| Declarations added | {} |".format(len(added))) lines.append("| Declarations removed | {} |".format(len(removed))) lines.append("") @@ -561,9 +609,11 @@ def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: lines.append("") section("❌ Statements changed", stmt, open_=True) + section("🔧 Definition values changed (data changed, type unchanged)", + defval, open_=bool(defval and not stmt)) section("✅ Proofs golfed", golfed, open_=False) section("✅ Embedded proofs golfed (type unchanged)", embedded, open_=False) - section("🔧 Definition values changed (type unchanged)", defbody, open_=False) + section("✅ Definition proofs golfed (data unchanged)", defproof, open_=False) section("↩️ Only a newline removed (tokens unchanged)", newline, open_=False) section("➕ Declarations added", added, open_=False) section("➖ Declarations removed", removed, open_=False) @@ -583,10 +633,11 @@ def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: "and whitespace are ignored, and by proof terms embedded " "in a type are treated as proofs (proof-irrelevant). Anonymous " "instances and examples are not tracked (no stable name). " - "A “definition value changed” is a def/instance" - "/abbrev whose type is unchanged but whose defining term " - "changed. ; counts exclude the <;> " - "combinator.") + "For a def/instance/abbrev we mask " + "its by proof blocks: if only those changed it is a " + "“definition proofs golfed” (the data is unchanged); if something " + "outside them changed it is a “definition value changed”. ; " + "counts exclude the <;> combinator.") return "\n".join(lines) From 16260fd8172840bce45ad50bd6453ee60b84735d Mon Sep 17 00:00:00 2001 From: Vasily Ilin Date: Fri, 3 Jul 2026 10:16:28 -0700 Subject: [PATCH 4/5] fix: treat whole-body `by` defs as value changes, not proof golfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A `def foo : T := by refine …` builds its *value* with tactics, so a change inside that `by` block can change the definition itself, not just a discharged proof obligation. Masking it as a proof made such edits look like harmless proof golfs. Only classify a changed def body as "proof golfed" when the term outside `by` proof blocks is identical AND the body is not itself a single `by` block (mask != ""). Otherwise report it as a definition-value change. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01Vjiv6nckrp5cJT9rpgZaKT --- scripts/check_golf.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/check_golf.py b/scripts/check_golf.py index 329ce9165..ef4c4e660 100755 --- a/scripts/check_golf.py +++ b/scripts/check_golf.py @@ -501,12 +501,16 @@ def compare_file(path: str, base_src: Optional[str], # A real token-level change to the proof / defining term. if d.is_proof: rep.proof_golfed.append(name) - elif mask_by_indent(b.body_raw) == mask_by_indent(d.body_raw): - # A definition whose data is unchanged once `by` proof blocks - # are masked: only its proof obligations were golfed. - rep.def_proof_golfed.append(name) else: - rep.def_value_changed.append(name) + mb, mh = mask_by_indent(b.body_raw), mask_by_indent(d.body_raw) + # "Data unchanged" only if the term outside `by` proof blocks is + # identical AND the whole body is not itself one `by` block: a + # `def foo := by ...` builds its *value* with tactics, so any + # change there may change the definition, not just a proof. + if mb == mh and mh != "": + rep.def_proof_golfed.append(name) + else: + rep.def_value_changed.append(name) crammed = semi_count(d.body_raw) - semi_count(b.body_raw) if crammed > 0: rep.semicolon_crammed.append((name, crammed)) From 15ec177fe107a582720190ebc6e0905ef9c5a1df Mon Sep 17 00:00:00 2001 From: Vasily Ilin Date: Fri, 3 Jul 2026 10:21:46 -0700 Subject: [PATCH 5/5] report: soften "definition values changed" to "bodies changed (may have changed)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bucket mixes genuine defining-term changes with defs where only proof parts changed but that can't be proven textually — e.g. a `def := by refine … ?_` whose data is untouched, or a structure-instance whose proof fields were rewritten from tactic to term mode. Asserting "data changed" over-claims those. Relabel to "Definition bodies changed (data *may* have changed, type unchanged)" and add an explanatory note; the high-confidence "Definition proofs golfed (data unchanged)" bucket is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01Vjiv6nckrp5cJT9rpgZaKT --- scripts/check_golf.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/scripts/check_golf.py b/scripts/check_golf.py index ef4c4e660..fce8eb5b0 100755 --- a/scripts/check_golf.py +++ b/scripts/check_golf.py @@ -7,7 +7,8 @@ repository and, for every declaration in the changed Lean files, decides whether its statement was preserved and only the proof/body changed. For a definition it further distinguishes changes confined to its ``by`` proof -blocks (data unchanged) from changes to the defining term itself. It also +blocks (data unchanged) from changes to the defining term itself (where the +data *may* have changed -- textually we cannot always tell). It also counts the trivial reshapes among the golfs: proofs where only a newline was removed (same tokens, fewer lines), and proofs where tactics were joined onto one line with a ``;`` (excluding the ``<;>`` combinator). @@ -587,7 +588,7 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: lines.append("| Proofs golfed (statement unchanged) | {} |".format(len(golfed))) lines.append("| Embedded proofs golfed (type unchanged) | {} |".format(len(embedded))) lines.append("| Definition proofs golfed (data unchanged) | {} |".format(len(defproof))) - lines.append("| **Definition values changed** (data changed, type unchanged) | {} |".format(len(defval))) + lines.append("| **Definition bodies changed** (data *may* have changed, type unchanged) | {} |".format(len(defval))) lines.append("| **Statements changed** | {} |".format(len(stmt))) lines.append("| Declarations added | {} |".format(len(added))) lines.append("| Declarations removed | {} |".format(len(removed))) @@ -602,19 +603,25 @@ def render_report(base: str, head: str, reports: List[FileReport]) -> str: lines.append("| — total `;` tactic-joins introduced | {} |".format(cram_total)) lines.append("") - def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: + def section(title: str, items: List[Tuple[str, str]], open_: bool, + note: str = "") -> None: if not items: return lines.append("{} ({})\n".format( " open" if open_ else "", title, len(items))) + if note: + lines.append(note + "\n") for path, name in sorted(items): lines.append("- `{}` — `{}`".format(name, path)) lines.append("\n") lines.append("") section("❌ Statements changed", stmt, open_=True) - section("🔧 Definition values changed (data changed, type unchanged)", - defval, open_=bool(defval and not stmt)) + section("🔧 Definition bodies changed (data may have changed, type unchanged)", + defval, open_=bool(defval and not stmt), + note=("_The defining term outside `by` proof blocks changed, or the " + "whole body is one `by` block. The data may or may not have " + "actually changed — flagged for a human glance._")) section("✅ Proofs golfed", golfed, open_=False) section("✅ Embedded proofs golfed (type unchanged)", embedded, open_=False) section("✅ Definition proofs golfed (data unchanged)", defproof, open_=False) @@ -640,7 +647,9 @@ def section(title: str, items: List[Tuple[str, str]], open_: bool) -> None: "For a def/instance/abbrev we mask " "its by proof blocks: if only those changed it is a " "“definition proofs golfed” (the data is unchanged); if something " - "outside them changed it is a “definition value changed”. ; " + "outside them changed — or the whole body is one by block " + "— it is a “definition body changed” (the data may have changed; " + "textually we cannot always tell). ; " "counts exclude the <;> combinator.") return "\n".join(lines)