Skip to content
Draft
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 72 additions & 13 deletions scripts/create_singularities
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,17 @@ class Builder:
return subprocess.run(args, **kwargs)

@staticmethod
def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]:
r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags")
versions = [cast(str, res["name"]) for res in r.json()["results"]]
if version_regex:
versions = [v for v in versions if re.search(version_regex, v)]
def _select_best_version(versions: list[str], image: str, only_good_versions: bool=False) -> Optional[tuple[str, str]]:
"""Select the best version tag from a list of tags.

Args:
versions: List of version tag strings (already filtered by version_regex if needed)
image: Image identifier used only for logging
only_good_versions: If True, require a semver-style version even when only one tag exists

Returns:
A tuple (version_pure, version_tag) for the best version, or None if none qualify.
"""
if len(versions) > 1 or (versions and only_good_versions):
# select only the ones which seems to be semantic and/or
# master/latest. Some release alpha releases, so probably would
Expand All @@ -113,11 +119,49 @@ class Builder:
else:
return None
elif not versions:
log.info(" %s no version. Tags: %s", dh, " ".join(versions))
log.info(" %s no version. Tags: %s", image, " ".join(versions))
return None
else:
return (versions[0], versions[0])

@staticmethod
def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]:
r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags")
versions = [cast(str, res["name"]) for res in r.json()["results"]]

if version_regex:
versions = [v for v in versions if re.search(version_regex, v)]
return Builder._select_best_version(versions, dh, only_good_versions)

@staticmethod
def get_last_ghcr_version_tag(image: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]:
"""Get the last version tag for a GitHub Container Registry (ghcr.io) image.

Args:
image: Image path without the registry prefix, e.g. "unfmontreal/skullduggery"
(the part after "ghcr.io/")
only_good_versions: If True, require a semver-style version even when only one tag exists
version_regex: If set, only consider tags matching this regex

Returns:
A tuple (version_pure, version_tag) for the best version, or None if none qualify.
"""
# Obtain an anonymous Bearer token for public GHCR images
token_r = retry_get(
f"https://ghcr.io/token?scope=repository:{image}:pull&service=ghcr.io"
)
token = token_r.json().get("token") or token_r.json().get("access_token", "")

r = retry_get(
f"https://ghcr.io/v2/{image}/tags/list",
headers={"Authorization": f"Bearer {token}"},
)
versions: list[str] = r.json().get("tags") or []

if version_regex:
versions = [v for v in versions if re.search(version_regex, v)]
return Builder._select_best_version(versions, image, only_good_versions)

@staticmethod
def get_docker_repositories(namespace: str, full:bool = True) -> Generator[str, None, None]:
"""Return repositories for a specific namespace (user or organization)
Expand Down Expand Up @@ -340,19 +384,31 @@ class Builder:
familysuf: Optional[str]=None,
) \
-> None:
is_ghcr = githubid.startswith("ghcr.io/")
dockerhubid = githubid.lower()
if not family:
family = dockerhubid.split('/', 1)[0]
if is_ghcr:
# For ghcr.io/owner/image, use owner as the family
parts = dockerhubid.split('/')
family = parts[1] if len(parts) > 1 else dockerhubid
else:
family = dockerhubid.split('/', 1)[0]
if self.githubids and githubid not in self.githubids:
log.info("skip %s", githubid)
return
#if dockerhubid in ("djarecka/fmriprep_fake", "pennlinc/toy_bids_app", "nipreps/fmripost-aroma", "nipreps/fmripost-phase", "nipreps/fmripost-rapidtide"):
# log.info("TEMP TODO skip %s due to odd OCI issue ATM with old singularity", dockerhubid)
# return
log.info("%s <- docker %s", family, dockerhubid)
last_version = self.get_last_docker_version_tag(
dockerhubid, only_good_versions=only_good_versions,
version_regex=version_regex)
if is_ghcr:
ghcr_image = dockerhubid[len("ghcr.io/"):]
last_version = self.get_last_ghcr_version_tag(
ghcr_image, only_good_versions=only_good_versions,
version_regex=version_regex)
else:
last_version = self.get_last_docker_version_tag(
dockerhubid, only_good_versions=only_good_versions,
version_regex=version_regex)
if last_version is not None:
last_version_pure, last_version_tag = last_version
# print(dockerhubid, last_version)
Expand Down Expand Up @@ -460,7 +516,7 @@ def version_key(vstr: str) -> tuple[int, tuple[int, ...], str]:

# Use familysuf to add smth like "LTS" or alike
def get_familyname(repoid: str, family: str, familysuf: Optional[str]=None) -> str:
name = re.sub(r"^[^/]*/", "", repoid)
name = re.sub(r".*/", "", repoid) # take only the last path segment (handles ghcr.io/owner/image)
# sanitize for datalad not allowing _ in the container names
name = name.replace("_", "-")
familyname = f"{family}-{name}"
Expand Down Expand Up @@ -562,6 +618,9 @@ def main(push: bool, image_groups: tuple[str, ...], no_singularity_check: bool,
# Docker image for simple_workflow
builder.generate_singularity_for_docker_image("ReproNim/simple_workflow")

if should_build('repronim', 'ghcr.io/unfmontreal/skullduggery'):
builder.generate_singularity_for_docker_image("ghcr.io/unfmontreal/skullduggery", "repronim")

# neuronets
if should_build('neuronets', 'neuronets/kwyk'):
builder.generate_singularity_for_docker_image("neuronets/kwyk")
Expand All @@ -588,11 +647,11 @@ def main(push: bool, image_groups: tuple[str, ...], no_singularity_check: bool,
builder.runcmd("datalad", "push", "--data=auto") # so we share with the world


def retry_get(url: str) -> requests.Response:
def retry_get(url: str, headers: Optional[dict[str, str]] = None) -> requests.Response:
sleepiter = exp_wait(attempts=10)
while True:
try:
r = requests.get(url)
r = requests.get(url, headers=headers)
r.raise_for_status()
return r
except (requests.ConnectionError, requests.HTTPError, requests.Timeout) as e:
Expand Down