Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions bin/delete-version
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import difflib
import json
import re
import shlex
import subprocess
import sys
from pathlib import Path

BUCKET = "gs://pypi.devinfra.sentry.io"
CACHE_NO = ("--cache-control", "no-store")
CACHE_FIVE_MINUTES = ("--cache-control", "public, max-age=300")
CONTENT_TYPE_HTML = ("--content-type", "text/html; charset=utf-8")


def normalize(name: str) -> str:
# PEP 427: wheel distribution name uses underscores
return re.sub(r"[-_.]+", "_", name).lower()


def wheel_matches(filename: str, norm_name: str, version: str) -> bool:
return filename.startswith(f"{norm_name}-{version}-") and filename.endswith(".whl")


def run(cmd: tuple[str, ...], dry_run: bool) -> None:
if dry_run:
print(f" [dry-run] {shlex.join(cmd)}")
else:
print(f" {shlex.join(cmd)}")
subprocess.check_call(cmd)


def fetch(src: str, dest: str) -> None:
subprocess.check_call(("gcloud", "storage", "cp", src, dest))


_RED = "\033[31m"
_GREEN = "\033[32m"
_CYAN = "\033[36m"
_RESET = "\033[0m"


def print_diff(orig: Path, new: Path) -> None:
orig_lines = orig.read_text().splitlines(keepends=True)
new_lines = new.read_text().splitlines(keepends=True)
for line in difflib.unified_diff(
orig_lines, new_lines, fromfile=str(orig), tofile=str(new)
):
if line.startswith("+"):
sys.stdout.write(f"{_GREEN}{line}{_RESET}")
elif line.startswith("-"):
sys.stdout.write(f"{_RED}{line}{_RESET}")
elif line.startswith("@@"):
sys.stdout.write(f"{_CYAN}{line}{_RESET}")
else:
sys.stdout.write(line)


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Remove a specific version of a package from the PyPI mirror.",
)
parser.add_argument("package", help="Package name (e.g. sentry-sdk)")
parser.add_argument("version", help="Version to remove (e.g. 2.58.0a1)")
parser.add_argument(
"--execute",
action="store_true",
help="Actually perform the deletions (default: dry run)",
)
args = parser.parse_args(argv)

dry_run = not args.execute
norm_name = normalize(args.package)
# dumb-pypi uses hyphens for the simple/ directory name
pkg_slug = norm_name.replace("_", "-")
index_url = f"{BUCKET}/simple/{pkg_slug}/index.html"

if dry_run:
print("DRY RUN — pass --execute to make changes\n")

print(f"Package : {args.package} (normalized: {norm_name})")
print(f"Version : {args.version}")
print()

new_packages_json_path = Path("packages.json.new")
new_index_path = Path("index.html.new")

try:
# --- packages.json ---
packages_json_path = Path("packages.json.orig")
print(f"Fetching {BUCKET}/packages.json ...")
fetch(f"{BUCKET}/packages.json", str(packages_json_path))

packages = [
json.loads(line)
for line in packages_json_path.read_text().splitlines()
if line.strip()
]
to_remove = [
p for p in packages if wheel_matches(p["filename"], norm_name, args.version)
]

if not to_remove:
print(
f"No wheels found matching {norm_name}=={args.version} in packages.json",
file=sys.stderr,
)
return 1

print(f"Wheels to remove ({len(to_remove)}):")
for p in to_remove:
print(f" {p['filename']}")
print()

remove_filenames = {p["filename"] for p in to_remove}
remaining = [p for p in packages if p["filename"] not in remove_filenames]

new_packages_json_path.write_text(
"\n".join(json.dumps(p) for p in remaining) + "\n"
)

# --- index.html ---
index_orig_path = Path("index.html.orig")
print(f"Fetching {index_url} ...")
fetch(index_url, str(index_orig_path))

def _remove_li(match: re.Match[str]) -> str:
return (
""
if any(fn in match.group() for fn in remove_filenames)
else match.group()
)

new_index_text = re.sub(
r"[ \t]*<li>.*?</li>[ \t]*\n",
_remove_li,
index_orig_path.read_text(),
flags=re.DOTALL,
)
new_index_path.write_text(new_index_text)

print_diff(packages_json_path, new_packages_json_path)
print_diff(index_orig_path, new_index_path)
print()

# --- GCS writes ---
print("Operations:")
run(
(
"gcloud",
"storage",
"cp",
*CACHE_NO,
str(new_packages_json_path),
f"{BUCKET}/packages.json",
),
dry_run,
)
run(
(
"gcloud",
"storage",
"cp",
*CACHE_FIVE_MINUTES,
*CONTENT_TYPE_HTML,
str(new_index_path),
index_url,
),
dry_run,
)
for filename in sorted(remove_filenames):
run(
("gcloud", "storage", "rm", f"{BUCKET}/wheels/{filename}"),
dry_run,
)
# also remove the .metadata sidecar uploaded alongside each wheel
run(
("gcloud", "storage", "rm", f"{BUCKET}/wheels/{filename}.metadata"),
dry_run,
)

finally:
if args.execute:
for f in (
packages_json_path,
index_orig_path,
new_packages_json_path,
new_index_path,
):
f.unlink(missing_ok=True)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

finally block crashes on undefined index_orig_path

High Severity

When --execute is passed and no matching wheels are found, return 1 at line 111 triggers the finally block. At that point, index_orig_path (assigned at line 126) has never been defined, causing an UnboundLocalError. The same crash occurs if any exception is raised between lines 93–125. This masks the original error/exit and leaves temp files behind instead of cleaning up.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 33de2cf. Configure here.

Comment on lines +185 to +193
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The script can crash with a NameError in the finally block if it exits early with --execute, because index_orig_path may not be defined before the cleanup code is run.
Severity: CRITICAL

Suggested Fix

Initialize index_orig_path to None before the try block. In the finally block, check if index_orig_path is not None and exists before attempting to access or unlink it. This ensures the variable always exists and prevents the NameError in realistic failure scenarios.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: bin/delete-version#L185-L193

Potential issue: The `finally` block in `bin/delete-version` attempts to clean up
temporary files. However, it references the `index_orig_path` variable, which is only
defined on line 126 within the `try` block. If the script exits early (e.g., at line 111
when no matching wheels are found) or if an exception occurs before line 126, the
`finally` block is still executed. When this happens with the `--execute` flag, the code
attempts to access the undefined `index_orig_path`, causing a `NameError` and crashing
the script. This leaves temporary files like `packages.json.orig` and `*.new` on disk.

Did we get this right? 👍 / 👎 to inform future reviews.


return 0


if __name__ == "__main__":
raise SystemExit(main())
1 change: 0 additions & 1 deletion packages.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3501,7 +3501,6 @@ python_versions = <3.12
[taskbroker-client==0.1.8]
[taskbroker-client==0.1.9]
[taskbroker-client==0.1.10]
[taskbroker-client==26.5.0]

[tiktoken==0.6.0]
python_versions = <3.13
Expand Down
42 changes: 20 additions & 22 deletions runbook/deleting.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,39 @@ cases.
usually a deletion should only be done if there's a security concern.

1. lock merges during the process (this can be done with branch protection)
1. remove the package from the `packages.ini` metadata (prevent reintroduction)
1. edit the `packages.json` to remove the deleted files:
2. remove the package from the `packages.ini` metadata (prevent reintroduction)
3a. (if removing a version of a package) use the automated script:

```bash
gsutil cp gs://pypi.devinfra.sentry.io/packages.json .
cp packages.json{,.bak}
"${EDITOR:-vim}" packages.json
git diff --no-index packages.json{.bak,}
gsutil -h 'Cache-Control: no store' cp packages.json gs://pypi.devinfra.sentry.io
```

1. (if removing the whole package) delete the index page for the package:
# preview what will change (dry run is the default)
./bin/delete-version <package> <version>

```bash
gsutil ls gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin
gsutil rm -r gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin
# apply
./bin/delete-version <package> <version> --execute
```

1. (if removing a version of a package) edit the index page for the package:
this updates `packages.json`, edits the package's `simple/` index page, and
deletes the specific wheels and their `.metadata` sidecars.

3b. (if removing the whole package) perform the steps manually:

```bash
gsutil cp gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin/index.html .
cp index.html{,.bak}
"${EDITOR:-vim}" index.html
gsutil -h 'Cache-Control: public, max-age=300' cp index.html gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin/index.html
```
# remove from packages.json
gsutil cp gs://pypi.devinfra.sentry.io/packages.json .
cp packages.json{,.bak}
"${EDITOR:-vim}" packages.json
git diff --no-index packages.json{.bak,}
gsutil -h 'Cache-Control: no-store' cp packages.json gs://pypi.devinfra.sentry.io

1. delete the wheels for the package:
# delete the index page
gsutil rm -r gs://pypi.devinfra.sentry.io/simple/uwsgi-dogstatsd-plugin

```bash
# delete all wheels for the package
gsutil ls gs://pypi.devinfra.sentry.io/wheels/uwsgi_dogstatsd_plugin*
gsutil rm gs://pypi.devinfra.sentry.io/wheels/uwsgi_dogstatsd_plugin*
```

1. unlock merges
4. unlock merges

_note that the index page may still link to the deleted package_ -- this is
harmless and will be cleaned up on the next execution
Loading