diff --git a/bin/delete-version b/bin/delete-version new file mode 100755 index 00000000..34a0d7e9 --- /dev/null +++ b/bin/delete-version @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import difflib +import json +import re +import shlex +import subprocess +import sys +from pathlib import Path + +BUCKET = "gs://pypi.devinfra.sentry.io" +CACHE_NO = ("--cache-control", "no-store") +CACHE_FIVE_MINUTES = ("--cache-control", "public, max-age=300") +CONTENT_TYPE_HTML = ("--content-type", "text/html; charset=utf-8") + + +def normalize(name: str) -> str: + # PEP 427: wheel distribution name uses underscores + return re.sub(r"[-_.]+", "_", name).lower() + + +def wheel_matches(filename: str, norm_name: str, version: str) -> bool: + return filename.startswith(f"{norm_name}-{version}-") and filename.endswith(".whl") + + +def run(cmd: tuple[str, ...], dry_run: bool) -> None: + if dry_run: + print(f" [dry-run] {shlex.join(cmd)}") + else: + print(f" {shlex.join(cmd)}") + subprocess.check_call(cmd) + + +def fetch(src: str, dest: str) -> None: + subprocess.check_call(("gcloud", "storage", "cp", src, dest)) + + +_RED = "\033[31m" +_GREEN = "\033[32m" +_CYAN = "\033[36m" +_RESET = "\033[0m" + + +def print_diff(orig: Path, new: Path) -> None: + orig_lines = orig.read_text().splitlines(keepends=True) + new_lines = new.read_text().splitlines(keepends=True) + for line in difflib.unified_diff( + orig_lines, new_lines, fromfile=str(orig), tofile=str(new) + ): + if line.startswith("+"): + sys.stdout.write(f"{_GREEN}{line}{_RESET}") + elif line.startswith("-"): + sys.stdout.write(f"{_RED}{line}{_RESET}") + elif line.startswith("@@"): + sys.stdout.write(f"{_CYAN}{line}{_RESET}") + else: + sys.stdout.write(line) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Remove a specific version of a package from the PyPI mirror.", + ) + parser.add_argument("package", help="Package name (e.g. sentry-sdk)") + parser.add_argument("version", help="Version to remove (e.g. 2.58.0a1)") + parser.add_argument( + "--execute", + action="store_true", + help="Actually perform the deletions (default: dry run)", + ) + args = parser.parse_args(argv) + + dry_run = not args.execute + norm_name = normalize(args.package) + # dumb-pypi uses hyphens for the simple/ directory name + pkg_slug = norm_name.replace("_", "-") + index_url = f"{BUCKET}/simple/{pkg_slug}/index.html" + + if dry_run: + print("DRY RUN — pass --execute to make changes\n") + + print(f"Package : {args.package} (normalized: {norm_name})") + print(f"Version : {args.version}") + print() + + new_packages_json_path = Path("packages.json.new") + new_index_path = Path("index.html.new") + + try: + # --- packages.json --- + packages_json_path = Path("packages.json.orig") + print(f"Fetching {BUCKET}/packages.json ...") + fetch(f"{BUCKET}/packages.json", str(packages_json_path)) + + packages = [ + json.loads(line) + for line in packages_json_path.read_text().splitlines() + if line.strip() + ] + to_remove = [ + p for p in packages if wheel_matches(p["filename"], norm_name, args.version) + ] + + if not to_remove: + print( + f"No wheels found matching {norm_name}=={args.version} in packages.json", + file=sys.stderr, + ) + return 1 + + print(f"Wheels to remove ({len(to_remove)}):") + for p in to_remove: + print(f" {p['filename']}") + print() + + remove_filenames = {p["filename"] for p in to_remove} + remaining = [p for p in packages if p["filename"] not in remove_filenames] + + new_packages_json_path.write_text( + "\n".join(json.dumps(p) for p in remaining) + "\n" + ) + + # --- index.html --- + index_orig_path = Path("index.html.orig") + print(f"Fetching {index_url} ...") + fetch(index_url, str(index_orig_path)) + + def _remove_li(match: re.Match[str]) -> str: + return ( + "" + if any(fn in match.group() for fn in remove_filenames) + else match.group() + ) + + new_index_text = re.sub( + r"[ \t]*
  • .*?
  • [ \t]*\n", + _remove_li, + index_orig_path.read_text(), + flags=re.DOTALL, + ) + new_index_path.write_text(new_index_text) + + print_diff(packages_json_path, new_packages_json_path) + print_diff(index_orig_path, new_index_path) + print() + + # --- GCS writes --- + print("Operations:") + run( + ( + "gcloud", + "storage", + "cp", + *CACHE_NO, + str(new_packages_json_path), + f"{BUCKET}/packages.json", + ), + dry_run, + ) + run( + ( + "gcloud", + "storage", + "cp", + *CACHE_FIVE_MINUTES, + *CONTENT_TYPE_HTML, + str(new_index_path), + index_url, + ), + dry_run, + ) + for filename in sorted(remove_filenames): + run( + ("gcloud", "storage", "rm", f"{BUCKET}/wheels/{filename}"), + dry_run, + ) + # also remove the .metadata sidecar uploaded alongside each wheel + run( + ("gcloud", "storage", "rm", f"{BUCKET}/wheels/{filename}.metadata"), + dry_run, + ) + + finally: + if args.execute: + for f in ( + packages_json_path, + index_orig_path, + new_packages_json_path, + new_index_path, + ): + f.unlink(missing_ok=True) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/packages.ini b/packages.ini index aad53b92..b2db8dd8 100644 --- a/packages.ini +++ b/packages.ini @@ -3501,7 +3501,6 @@ python_versions = <3.12 [taskbroker-client==0.1.8] [taskbroker-client==0.1.9] [taskbroker-client==0.1.10] -[taskbroker-client==26.5.0] [tiktoken==0.6.0] python_versions = <3.13 diff --git a/runbook/deleting.md b/runbook/deleting.md index 1e6f87a6..5429865b 100644 --- a/runbook/deleting.md +++ b/runbook/deleting.md @@ -7,41 +7,39 @@ cases. usually a deletion should only be done if there's a security concern. 1. lock merges during the process (this can be done with branch protection) -1. remove the package from the `packages.ini` metadata (prevent reintroduction) -1. edit the `packages.json` to remove the deleted files: +2. remove the package from the `packages.ini` metadata (prevent reintroduction) +3a. (if removing a version of a package) use the automated script: ```bash - gsutil cp gs://pypi.devinfra.sentry.io/packages.json . - cp packages.json{,.bak} - "${EDITOR:-vim}" packages.json - git diff --no-index packages.json{.bak,} - gsutil -h 'Cache-Control: no store' cp packages.json gs://pypi.devinfra.sentry.io - ``` - -1. (if removing the whole package) delete the index page for the package: + # preview what will change (dry run is the default) + ./bin/delete-version - ```bash - gsutil ls gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin - gsutil rm -r gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin + # apply + ./bin/delete-version --execute ``` -1. (if removing a version of a package) edit the index page for the package: + this updates `packages.json`, edits the package's `simple/` index page, and + deletes the specific wheels and their `.metadata` sidecars. + +3b. (if removing the whole package) perform the steps manually: ```bash - gsutil cp gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin/index.html . - cp index.html{,.bak} - "${EDITOR:-vim}" index.html - gsutil -h 'Cache-Control: public, max-age=300' cp index.html gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin/index.html - ``` + # remove from packages.json + gsutil cp gs://pypi.devinfra.sentry.io/packages.json . + cp packages.json{,.bak} + "${EDITOR:-vim}" packages.json + git diff --no-index packages.json{.bak,} + gsutil -h 'Cache-Control: no-store' cp packages.json gs://pypi.devinfra.sentry.io -1. delete the wheels for the package: + # delete the index page + gsutil rm -r gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin - ```bash + # delete all wheels for the package gsutil ls gs://pypi.devinfra.sentry.io/wheels/uwsgi_dogstatsd_plugin* gsutil rm gs://pypi.devinfra.sentry.io/wheels/uwsgi_dogstatsd_plugin* ``` -1. unlock merges +4. unlock merges _note that the index page may still link to the deleted package_ -- this is harmless and will be cleaned up on the next execution