From 422e323fb405971a28b2761539eeea0ecdc1867c Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Tue, 26 May 2026 19:59:10 +0000
Subject: [PATCH 01/11] saturn-python-llm: declare trl, peft, datasets
 explicitly

These are pulled in transitively by unsloth, but declaring them
explicitly makes the image's training API surface stable against
unsloth version bumps. The Token Factory fine-tune training script
(in a separate repo) imports trl.SFTTrainer and peft directly, and
needs to be able to rely on those being present and version-compatible
with the rest of the image's HF stack.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-llm/environment.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/saturn-python-llm/environment.yml b/saturn-python-llm/environment.yml
index 6c1acda..92afeca 100644
--- a/saturn-python-llm/environment.yml
+++ b/saturn-python-llm/environment.yml
@@ -32,7 +32,14 @@ dependencies:
   - ipykernel
   - pip
   - pip:
+    # Fine-tuning stack. unsloth pulls trl/peft/datasets transitively, but we
+    # declare them explicitly so the image's training API surface is stable
+    # against unsloth version bumps. The Token Factory fine-tune training
+    # script (separate repo) imports trl.SFTTrainer + peft directly.
     - unsloth
+    - trl
+    - peft
+    - datasets
     - vllm
     - ray
     - sentence-transformers

From e368c795e9501b70767455e931c30de86e53addf Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Wed, 27 May 2026 17:42:32 +0000
Subject: [PATCH 02/11] Fix recipe-template field names to match
 ImageSpecSchema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Five recipe-template.json files were using bogus field names
(recipeName/image/gpu/saturnVersion) introduced when the 12.4 images
were added in Aug 2025 and propagated forward to 12.9 and the AMD
image. The release-images builder uploads these as-is to S3 with
schema_version 2022.03.01, which causes the legacy pre_load in
saturn's BaseRecipeSchema to wrap them under "spec" — at which point
they fail ImageSpecSchema validation (Missing required field "name";
Unknown fields recipeName/gpu/image/saturnVersion).

Switch all five templates to the established shape
(name/description/hardware_type/supports). The AMD template uses
hardware_type=AMD; the others use gpu/cpu as appropriate, matching
the existing CUDA 11.8 / 12.1 templates.
---
 saturn-python-312-slim-gpu-12.9/recipe-template.json  | 11 +++++------
 saturn-python-312-slim/recipe-template.json           | 11 +++++------
 saturnbase-python-amd-gpu-devel/recipe-template.json  | 11 +++++------
 saturnbase-python-gpu-12.9/recipe-template.json       | 11 +++++------
 saturnbase-python-gpu-devel-12.9/recipe-template.json | 11 +++++------
 5 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/saturn-python-312-slim-gpu-12.9/recipe-template.json b/saturn-python-312-slim-gpu-12.9/recipe-template.json
index dacfbea..892a292 100644
--- a/saturn-python-312-slim-gpu-12.9/recipe-template.json
+++ b/saturn-python-312-slim-gpu-12.9/recipe-template.json
@@ -1,7 +1,6 @@
 {
-    "recipeName": "saturn-python-312-slim-gpu-12.9",
-    "description": "Python 3.12 GPU slim image with CUDA 12.9 and minimal packages",
-    "image": "saturncloud/saturn-python-slim-gpu:2025.05.01-cuda129-python312",
-    "gpu": true,
-    "saturnVersion": "2025.05.01"
-}
\ No newline at end of file
+    "name": "saturn-python-312-slim-gpu-12.9",
+    "description": "Python 3.12 GPU slim image with CUDA 12.9 and minimal packages.",
+    "hardware_type": "gpu",
+    "supports": ["jupyterlab", "dask"]
+}
diff --git a/saturn-python-312-slim/recipe-template.json b/saturn-python-312-slim/recipe-template.json
index 6c6380e..ade91e8 100644
--- a/saturn-python-312-slim/recipe-template.json
+++ b/saturn-python-312-slim/recipe-template.json
@@ -1,7 +1,6 @@
 {
-    "recipeName": "saturn-python-312-slim",
-    "description": "Python 3.12 slim image with minimal packages",
-    "image": "saturncloud/saturn-python-slim:2025.05.01-python312",
-    "gpu": false,
-    "saturnVersion": "2025.05.01"
-}
\ No newline at end of file
+    "name": "saturn-python-312-slim",
+    "description": "Python 3.12 slim image with minimal packages.",
+    "hardware_type": "cpu",
+    "supports": ["jupyterlab", "dask"]
+}
diff --git a/saturnbase-python-amd-gpu-devel/recipe-template.json b/saturnbase-python-amd-gpu-devel/recipe-template.json
index e2e852e..e8e21fb 100644
--- a/saturnbase-python-amd-gpu-devel/recipe-template.json
+++ b/saturnbase-python-amd-gpu-devel/recipe-template.json
@@ -1,7 +1,6 @@
 {
-    "recipeName": "saturnbase-python-amd-gpu-devel",
-    "description": "Saturn base Python GPU devel image with rocm7 development tools",
-    "image": "saturncloud/saturnbase-python-amd-gpu-devel:2025.05.01",
-    "gpu": true,
-    "saturnVersion": "2025.05.01"
-}
\ No newline at end of file
+    "name": "saturnbase-python-amd-gpu-devel",
+    "description": "Saturn base Python AMD GPU devel image with ROCm 7 development tools.",
+    "hardware_type": "AMD",
+    "supports": ["jupyterlab", "dask"]
+}
diff --git a/saturnbase-python-gpu-12.9/recipe-template.json b/saturnbase-python-gpu-12.9/recipe-template.json
index d24f9a5..d37c424 100644
--- a/saturnbase-python-gpu-12.9/recipe-template.json
+++ b/saturnbase-python-gpu-12.9/recipe-template.json
@@ -1,7 +1,6 @@
 {
-    "recipeName": "saturnbase-python-gpu-12.9",
-    "description": "Saturn base Python GPU image with CUDA 12.9",
-    "image": "saturncloud/saturnbase-python-gpu-12.9:2025.05.01",
-    "gpu": true,
-    "saturnVersion": "2025.05.01"
-}
\ No newline at end of file
+    "name": "saturnbase-python-gpu-12.9",
+    "description": "Python-focused base image for Saturn GPU images, built on CUDA version 12.9. This image contains the minimal install required for the full functionality of Saturn Cloud on a GPU instance, including packages necessary to run Python, JupyterLab, and Dask.",
+    "hardware_type": "gpu",
+    "supports": ["jupyterlab", "dask"]
+}
diff --git a/saturnbase-python-gpu-devel-12.9/recipe-template.json b/saturnbase-python-gpu-devel-12.9/recipe-template.json
index ed969d0..a6bcd66 100644
--- a/saturnbase-python-gpu-devel-12.9/recipe-template.json
+++ b/saturnbase-python-gpu-devel-12.9/recipe-template.json
@@ -1,7 +1,6 @@
 {
-    "recipeName": "saturnbase-python-gpu-devel-12.9",
-    "description": "Saturn base Python GPU devel image with CUDA 12.9 development tools",
-    "image": "saturncloud/saturnbase-python-gpu-devel-12.9:2025.05.01",
-    "gpu": true,
-    "saturnVersion": "2025.05.01"
-}
\ No newline at end of file
+    "name": "saturnbase-python-gpu-devel-12.9",
+    "description": "Python-focused base image for Saturn GPU images, built on CUDA version 12.9 with development tools. This image contains the minimal install required for the full functionality of Saturn Cloud on a GPU instance, including packages necessary to run Python, JupyterLab, and Dask.",
+    "hardware_type": "gpu",
+    "supports": ["jupyterlab", "dask"]
+}

From 2421c3692246faa5373d12249021f21e5e567a4a Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Wed, 27 May 2026 22:33:55 +0000
Subject: [PATCH 03/11] saturn-python-llm: add axolotl 0.16.1 for Token Factory

The Token Factory fine-tuning service wraps axolotl: an in-pod shim
invokes `axolotl train <config>` via subprocess, so the binary needs
to be on the image's Python path.

Pinned exactly to 0.16.1 because Atlas's config renderer is keyed to
specific axolotl YAML field names that change across versions; a
loose pin or unpinned spec would silently break rendered configs on
the next axolotl release.

Context: saturncloud/saturn#6394.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-llm/environment.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/saturn-python-llm/environment.yml b/saturn-python-llm/environment.yml
index 92afeca..2b31677 100644
--- a/saturn-python-llm/environment.yml
+++ b/saturn-python-llm/environment.yml
@@ -40,6 +40,9 @@ dependencies:
     - trl
     - peft
     - datasets
+    # Pinned exactly: Token Factory's Atlas renderer is keyed to specific
+    # axolotl YAML field names that change across versions.
+    - axolotl==0.16.1
     - vllm
     - ray
     - sentence-transformers

From 1e970bc3f7cc19ca7308412d2ee26a13361b3e02 Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 28 May 2026 13:28:05 +0000
Subject: [PATCH 04/11] Pin python=3.13 in pytorch and tensorflow envs

Without an explicit python constraint, mamba now resolves python=3.14 on
conda-forge. That breaks both envs on release-2026.05.01:

- saturn-python-tensorflow: pip can't find tensorflow[and-cuda] for
  cp314 (no wheels yet) and snowflake-connector-python resolves to a
  cp314 wheel, so the pip step in `mamba env update` fails.
- saturn-python-pytorch: the multi-channel solve
  (pytorch + rapidsai + nvidia + conda-forge) blows up with
  "queue count overflow" and SIGABRTs after ~2.5h.

Pinning python=3.13 keeps us close to the latest while staying on a
version everything in the env list ships wheels/builds for. We can
revisit 3.14 once tensorflow et al catch up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-pytorch/environment.yml    | 2 +-
 saturn-python-tensorflow/environment.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/saturn-python-pytorch/environment.yml b/saturn-python-pytorch/environment.yml
index b9cf4b8..5186efc 100644
--- a/saturn-python-pytorch/environment.yml
+++ b/saturn-python-pytorch/environment.yml
@@ -23,7 +23,7 @@ dependencies:
     - py-opencv
     - pyarrow
     - python-graphviz
-    - python
+    - python=3.13
     - pytorch::pytorch
     - s3fs
     - setuptools
diff --git a/saturn-python-tensorflow/environment.yml b/saturn-python-tensorflow/environment.yml
index eecf295..612a7ac 100644
--- a/saturn-python-tensorflow/environment.yml
+++ b/saturn-python-tensorflow/environment.yml
@@ -17,7 +17,7 @@ dependencies:
     - pip
     - prefect
     - pyarrow
-    - python
+    - python=3.13
     - python-graphviz
     - s3fs
     - setuptools

From 551d3391fae7410c4959b3c50fcea69e53fbfc3d Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 28 May 2026 15:28:06 +0000
Subject: [PATCH 05/11] saturn-python-pytorch: move torch to PyPI cu129, drop
 pytorch conda channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pytorch conda channel is frozen at 2.5.1 (Oct 2024 — the team
announced 2.5 as the last release on the channel) and has no
cp313 builds. With the python=3.13 pin we landed in #471, the conda
solve for `pytorch::pytorch` against the frozen channel has nothing
to resolve. The rapidsai + pytorch + nvidia + conda-forge channel mix
was also what caused the previous solve to hit `queue count overflow`
and SIGABRT after 2h41m.

Switch torch / torchvision / torchaudio to PyPI cu129 wheels — pip
installs them after the conda env update, so we get the modern PyTorch
2.11 + CUDA 12.9 stack against the matching gpu-12.9 base image. Drop
the pytorch + nvidia + rapidsai conda channels and the pytorch::*
deps. Drop dask-cuda along with rapidsai (it's the only thing here
that needed that channel).

Pairs with a release-images change to point the
saturn-python-pytorch build at saturnbase-python-gpu-12.9 instead of
saturnbase-python-gpu-12.1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-pytorch/environment.yml | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/saturn-python-pytorch/environment.yml b/saturn-python-pytorch/environment.yml
index 5186efc..c469d8b 100644
--- a/saturn-python-pytorch/environment.yml
+++ b/saturn-python-pytorch/environment.yml
@@ -1,15 +1,11 @@
 name: saturn
 channels:
-    - pytorch
-    - rapidsai
-    - nvidia
-    - nodefaults
     - conda-forge
+    - nodefaults
 dependencies:
+    - python=3.13
     - blas=*=mkl
     - bokeh
-    - pytorch::pytorch-cuda=12.1
-    - dask-cuda
     - dask
     - fastai
     - fsspec
@@ -23,15 +19,16 @@ dependencies:
     - py-opencv
     - pyarrow
     - python-graphviz
-    - python=3.13
-    - pytorch::pytorch
     - s3fs
     - setuptools
     - tensorboard
-    - pytorch::torchaudio
-    - pytorch::torchvision
     - pynvml
     - pip:
+          - --extra-index-url
+          - https://download.pytorch.org/whl/cu129
+          - torch
+          - torchvision
+          - torchaudio
           - dask-saturn
           - saturn-client
           - saturnfs

From 85c94ac3b09afcfef77c49c10f0671bd57ec7ac9 Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 28 May 2026 17:57:47 +0000
Subject: [PATCH 06/11] Pin python=3.13 across all py images, fix pytorch
 index-url syntax

Mamba was resolving python=3.14 on conda-forge for any env without an
explicit python pin (3.14 released 2025-10-07). #471 already pinned
pytorch and tensorflow. This finishes the sweep:

- saturn-python: 3.11 -> 3.13.
- saturn-python-rapids: unpinned -> 3.13 (this image was on a path to
  hit the same 3.14 problem next build).
- saturn-python-llm: 3.11 -> 3.13. Also drops pytorch/nvidia conda
  channels and the conda pytorch/pytorch-cuda/cuda-toolkit deps for
  the same reason the saturn-python-pytorch image did in #472: the
  pytorch conda channel is frozen at 2.5.1 and has no cp313 builds.
  torch/torchvision/torchaudio move to pip via the PyPI cu129 index.
  flash_attn URL swaps cp311 -> cp313 (same build family already
  publishes a cp313 wheel at that tag).
- saturn-python-pytorch: collapses the two-line --extra-index-url
  pip arg into the correct single-line form. The split-line form is
  not the conda env yml pip-args grammar -- pip parses it as a
  bare --extra-index-url with no value.

Leaving saturn-python-312-slim* alone: their names encode python312
and they're already pinned to 3.12. R images keep python=3.11 since
python there is secondary tooling, not the image purpose.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-llm/environment.yml     | 15 +++++++--------
 saturn-python-pytorch/environment.yml | 16 ++++++----------
 saturn-python-rapids/environment.yml  |  2 +-
 saturn-python/environment.yml         |  2 +-
 4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/saturn-python-llm/environment.yml b/saturn-python-llm/environment.yml
index 2b31677..74d1bf0 100644
--- a/saturn-python-llm/environment.yml
+++ b/saturn-python-llm/environment.yml
@@ -1,14 +1,9 @@
 name: saturn
 channels:
-  - pytorch
-  - nvidia
   - conda-forge
-  - defaults
+  - nodefaults
 dependencies:
-  - python=3.11
-  - cuda-toolkit
-  - pytorch
-  - pytorch-cuda
+  - python=3.13
   - transformers
   - tokenizers
   - numpy
@@ -32,6 +27,10 @@ dependencies:
   - ipykernel
   - pip
   - pip:
+    - --extra-index-url https://download.pytorch.org/whl/cu129
+    - torch
+    - torchvision
+    - torchaudio
     # Fine-tuning stack. unsloth pulls trl/peft/datasets transitively, but we
     # declare them explicitly so the image's training API surface is stable
     # against unsloth version bumps. The Token Factory fine-tune training
@@ -50,7 +49,7 @@ dependencies:
     - bitsandbytes
     - auto-gptq
     - autoawq
-    - https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2%2Bcu12torch2.7cxx11abiTRUE-cp311-cp311-linux_x86_64.whl
+    - https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2%2Bcu12torch2.7cxx11abiTRUE-cp313-cp313-linux_x86_64.whl
     - xformers
     - gpustat
     - nvidia-ml-py
diff --git a/saturn-python-pytorch/environment.yml b/saturn-python-pytorch/environment.yml
index 5186efc..f5fb7b9 100644
--- a/saturn-python-pytorch/environment.yml
+++ b/saturn-python-pytorch/environment.yml
@@ -1,15 +1,11 @@
 name: saturn
 channels:
-    - pytorch
-    - rapidsai
-    - nvidia
-    - nodefaults
     - conda-forge
+    - nodefaults
 dependencies:
+    - python=3.13
     - blas=*=mkl
     - bokeh
-    - pytorch::pytorch-cuda=12.1
-    - dask-cuda
     - dask
     - fastai
     - fsspec
@@ -23,15 +19,15 @@ dependencies:
     - py-opencv
     - pyarrow
     - python-graphviz
-    - python=3.13
-    - pytorch::pytorch
     - s3fs
     - setuptools
     - tensorboard
-    - pytorch::torchaudio
-    - pytorch::torchvision
     - pynvml
     - pip:
+          - --extra-index-url https://download.pytorch.org/whl/cu129
+          - torch
+          - torchvision
+          - torchaudio
           - dask-saturn
           - saturn-client
           - saturnfs
diff --git a/saturn-python-rapids/environment.yml b/saturn-python-rapids/environment.yml
index 9a48ff7..3e1b995 100644
--- a/saturn-python-rapids/environment.yml
+++ b/saturn-python-rapids/environment.yml
@@ -21,7 +21,7 @@ dependencies:
     - prefect
     - pyarrow
     - python-graphviz
-    - python
+    - python=3.13
     - rapids
     - s3fs
     - scikit-learn
diff --git a/saturn-python/environment.yml b/saturn-python/environment.yml
index b7513db..3c9e968 100644
--- a/saturn-python/environment.yml
+++ b/saturn-python/environment.yml
@@ -14,7 +14,7 @@ dependencies:
     - pandas
     - pip
     - pyarrow
-    - python=3.11
+    - python=3.13
     - python-graphviz
     - s3fs
     - scikit-learn

From a739062571b48e407a705a884877164258dc0092 Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 28 May 2026 19:13:30 +0000
Subject: [PATCH 07/11] saturn-python-llm: pin python=3.12, drop
 auto-gptq/autoawq, bump flash_attn

The python=3.13 path is blocked by axolotl 0.16.1's unconditional
zstandard==0.22.0 transitive pin -- that wheel only ships cp310-cp312,
no cp313. Stepping down to python=3.12 keeps the rest of the axolotl
0.16.1 pin chain (torch==2.8.0, transformers==5.5.0, accelerate==1.13.0,
bitsandbytes==0.49.1, datasets==4.5.0, trl==0.29.0) resolvable from
pre-built wheels.

Also:

- Drop auto-gptq and autoawq. auto-gptq's sdist runs `import torch`
  at build-deps phase before torch is installed, breaking the env.
  autoawq has the same kind of issue. Neither is referenced by any
  saturncloud code; vllm handles GPTQ/AWQ checkpoint loading via
  compressed-tensors without these libs.
- Bump flash_attn to v2.8.3 with the cu12torch2.8 cp312 wheel. The
  cu12torch2.7 wheel was ABI-incompatible with torch 2.8.0 (undefined
  symbol _ZN3c104cuda9SetDeviceEa). v2.8.3 is the version axolotl
  0.16.1 itself wants under its flash-attn extra.

Verified locally: env builds cleanly, all heavy imports (torch,
transformers, axolotl, flash_attn, vllm, peft, trl, datasets,
accelerate, bitsandbytes) succeed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-llm/environment.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/saturn-python-llm/environment.yml b/saturn-python-llm/environment.yml
index 74d1bf0..c64f0b5 100644
--- a/saturn-python-llm/environment.yml
+++ b/saturn-python-llm/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - python=3.13
+  - python=3.12
   - transformers
   - tokenizers
   - numpy
@@ -47,9 +47,7 @@ dependencies:
     - sentence-transformers
     - accelerate
     - bitsandbytes
-    - auto-gptq
-    - autoawq
-    - https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2%2Bcu12torch2.7cxx11abiTRUE-cp313-cp313-linux_x86_64.whl
+    - https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3%2Bcu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
     - xformers
     - gpustat
     - nvidia-ml-py

From faf7dabb50faf97dc3ab53d57fb16b177d40b216 Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 28 May 2026 19:32:56 +0000
Subject: [PATCH 08/11] saturn-python-rapids: bump cuda to 12.9, drop dask-sql,
 pin rapids>=26.02

Three independent failures in the existing env on python=3.13:

- cuda-version=12.0 was no longer in conda-forge (only 12.4+ ships now),
  so the previous CI build couldn't even resolve a CUDA package set.
  Bump to 12.9 to match the gpu-12.9 base we already have for the
  pytorch image; rapids' bundled CUDA libs don't actually need to match
  the runtime base.
- dask-sql is abandoned upstream and tops out at python 3.12 on
  conda-forge. With our python=3.13 sweep it pulled the solver into
  2021-era versions. No saturncloud code references dask-sql; users
  who want SQL-on-dask can pip-install it on demand.
- Pinning rapids unbounded resolved to 25.08, which embeds an older
  cuml that broke at import time against the newer scikit-learn 1.8
  conda-forge ships (BaseEstimator._get_default_requests was renamed
  to _get_metadata_request). rapids>=26.02 resolves to 26.04 with a
  cuml that matches.

Verified locally: env builds clean, all heavy imports (cudf, cuml,
cupy, dask, dask_ml, sklearn, pyarrow, cvxpy, prefect, numba) succeed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-rapids/environment.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/saturn-python-rapids/environment.yml b/saturn-python-rapids/environment.yml
index 3e1b995..31c9416 100644
--- a/saturn-python-rapids/environment.yml
+++ b/saturn-python-rapids/environment.yml
@@ -6,10 +6,9 @@ channels:
     - conda-forge
 dependencies:
     - bokeh
-    - cuda-version=12.0
+    - cuda-version=12.9
     - cvxpy
     - dask-ml
-    - dask-sql
     - dask
     - ipykernel
     - ipywidgets
@@ -22,7 +21,7 @@ dependencies:
     - pyarrow
     - python-graphviz
     - python=3.13
-    - rapids
+    - rapids>=26.02
     - s3fs
     - scikit-learn
     - scipy

From 621e308cd3f39f462b90d163bf10591bfbd1b219 Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 28 May 2026 20:08:20 +0000
Subject: [PATCH 09/11] saturn-python-llm: pin vllm==0.11.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI's pip resolver walks vllm back through versions looking for one that
satisfies the full constraint set (torch==2.8.0 from axolotl 0.16.1
narrows the window significantly). Locally it lands at 0.11.0; in the
build container it kept walking past 0.11.0's manylinux1 wheel and
eventually fell into vllm 0.5.x sdists, which try to call /usr/local/cuda/bin/nvcc
at metadata-extraction time — but the runtime base image doesn't ship nvcc.

Pinning to 0.11.0 (the version the local solve already lands on)
short-circuits the backtracking and keeps the resolution wheel-only.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 saturn-python-llm/environment.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/saturn-python-llm/environment.yml b/saturn-python-llm/environment.yml
index c64f0b5..b121af0 100644
--- a/saturn-python-llm/environment.yml
+++ b/saturn-python-llm/environment.yml
@@ -42,7 +42,10 @@ dependencies:
     # Pinned exactly: Token Factory's Atlas renderer is keyed to specific
     # axolotl YAML field names that change across versions.
     - axolotl==0.16.1
-    - vllm
+    # Pinned: vllm walks back through versions otherwise — axolotl 0.16.1 forces
+    # torch==2.8.0, and only 0.10–0.11 satisfy that. Pin to keep CI's pip from
+    # backtracking past wheel-only releases into 0.5.x sdists (which need nvcc).
+    - vllm==0.11.0
     - ray
     - sentence-transformers
     - accelerate

From f8031f864ad52bb067b3cb4c646617b35a3e3f32 Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Sat, 30 May 2026 17:51:09 +0000
Subject: [PATCH 10/11] Pin transformers <5 in saturn-python-llm so vLLM 0.11
 boots

vLLM 0.11.0 reads tokenizer.all_special_tokens_extended at startup, which
transformers 5.x removed -> AttributeError -> CrashLoopBackOff. The env was
leaving transformers unpinned and resolving to 5.5.0, breaking every vLLM
serve pod built on this image.

Pin transformers>=4.55,<5 (conda + repeated in the pip: block). 4.57.6 was
empirically verified to boot vLLM 0.11.0, load Qwen2.5-7B + a LoRA adapter,
and serve /v1/chat/completions.

axolotl 0.16.1 (kept exactly pinned: TF's Atlas YAML renderer is keyed to its
field names) carries an over-strict transformers==5.5.0 metadata pin that
would drag 5.x back in. It runs fine on transformers 4.57.x, so it is now
installed --no-deps in a separate Dockerfile step, with its real transitive
deps declared explicitly in environment.yml. The cross-constraint still holds:
axolotl 0.16.1 forces torch==2.8.0 and only vLLM 0.10-0.11 satisfy that, so
vLLM stays pinned at 0.11.0.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 saturn-python-llm/Dockerfile      |  6 ++++++
 saturn-python-llm/environment.yml | 35 +++++++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/saturn-python-llm/Dockerfile b/saturn-python-llm/Dockerfile
index 6fb0d73..335b23b 100644
--- a/saturn-python-llm/Dockerfile
+++ b/saturn-python-llm/Dockerfile
@@ -6,7 +6,13 @@ RUN sudo apt-get -qq --allow-releaseinfo-change update && \
     libgl1
 
 COPY environment.yml /tmp/environment.yml
+# axolotl 0.16.1 is installed --no-deps so its over-strict transformers==5.5.0
+# metadata pin cannot drag transformers 5.x in and break vLLM 0.11 at boot.
+# Its transitive deps are declared explicitly in environment.yml. This is a
+# separate step because a --no-deps line inside the env.yml pip: block would
+# apply to the whole block, suppressing deps for every pip entry.
 RUN mamba env update -n saturn --file /tmp/environment.yml && \
+    ${CONDA_DIR}/envs/saturn/bin/python -m pip install --no-deps axolotl==0.16.1 && \
     ${CONDA_DIR}/envs/saturn/bin/python -m ipykernel install \
         --name python3 \
         --display-name 'saturn (Python 3)' \
diff --git a/saturn-python-llm/environment.yml b/saturn-python-llm/environment.yml
index b121af0..bc0455f 100644
--- a/saturn-python-llm/environment.yml
+++ b/saturn-python-llm/environment.yml
@@ -4,7 +4,12 @@ channels:
   - nodefaults
 dependencies:
   - python=3.12
-  - transformers
+  # vLLM 0.11 boots only on transformers 4.x: transformers 5.x removed
+  # tokenizer.all_special_tokens_extended, which vLLM 0.11 reads at startup,
+  # so 5.x triggers AttributeError -> CrashLoopBackOff. axolotl 0.16.1 runs
+  # fine on transformers 4.57.x at runtime (its metadata pins 5.5.0, but that
+  # is over-strict; see the pip: block where axolotl is installed --no-deps).
+  - transformers>=4.55,<5
   - tokenizers
   - numpy
   - psutil
@@ -39,9 +44,31 @@ dependencies:
     - trl
     - peft
     - datasets
-    # Pinned exactly: Token Factory's Atlas renderer is keyed to specific
-    # axolotl YAML field names that change across versions.
-    - axolotl==0.16.1
+    # Held below transformers 5 to match the conda transformers pin above (the
+    # pip: block is run through pip by `mamba env update`, so repeat the bound
+    # here to stop pip backtracking to 5.x).
+    - transformers>=4.55,<5
+    # axolotl 0.16.1 transitive deps. axolotl itself is installed separately in
+    # the Dockerfile with --no-deps, because its metadata carries an over-strict
+    # `transformers==5.5.0` pin that would otherwise drag transformers 5.x back
+    # in and break vLLM 0.11 at boot. axolotl runs fine on transformers 4.57.x;
+    # its real transitive deps are declared here (and via unsloth/vllm/trl/peft).
+    # NOTE: --no-deps cannot be scoped to a single entry inside this block (pip
+    # applies it to the whole `pip install` invocation), so axolotl is pulled
+    # out into its own `pip install --no-deps axolotl==0.16.1` Dockerfile step.
+    - liger-kernel==0.7.0
+    - lm_eval==0.4.11
+    - fla-core==0.4.1
+    - flash-linear-attention==0.4.1
+    - torchao==0.17.0
+    - optimum==1.16.2
+    - trackio>=0.16.1
+    - schedulefree==1.4.1
+    - axolotl-contribs-lgpl==0.0.7
+    - axolotl-contribs-mit==0.0.6
+    - openenv-core==0.1.0
+    - mistral-common==1.11.0
+    - modal==1.3.0.post1
     # Pinned: vllm walks back through versions otherwise — axolotl 0.16.1 forces
     # torch==2.8.0, and only 0.10–0.11 satisfy that. Pin to keep CI's pip from
     # backtracking past wheel-only releases into 0.5.x sdists (which need nvcc).

From b8928894c7fee1e911e8bde372ccd076f2343cad Mon Sep 17 00:00:00 2001
From: hugo <hugo@saturncloud.io>
Date: Thu, 4 Jun 2026 22:04:41 +0000
Subject: [PATCH 11/11] Add saturn-python-vllm + saturn-python-axolotl (split
 from saturn-python-llm)

saturn-python-llm tried to be one image for both vLLM serving and axolotl
fine-tuning, but the dep stacks are incompatible: vLLM 0.11 needs transformers<5
(5.x removed tokenizer.all_special_tokens_extended -> CrashLoopBackOff), while
axolotl 0.16.1 needs the transformers 5.x API (Trainer.create_optimizer(model=)
is 5.x-only; on 4.57 training dies inside the loop). Split by engine, extensible
to future inference engines / fine-tuning frameworks:

- saturn-python-vllm:    inference (vLLM, transformers<5; axolotl deps removed,
                         no --no-deps hack)
- saturn-python-axolotl: fine-tuning (axolotl installed WITH deps, so it pulls
                         the correct transformers 5.5 / datasets 4.5 / trl 0.29
                         / hf-hub>=1 stack; flash-attn + deepspeed + mlflow extras)

Both build on the cu129 GPU base. Registered for building in saturncloud/release-images
(PR adds them to data_science.py / main_release.py / the build matrix).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 saturn-python-axolotl/Dockerfile           | 25 +++++++++
 saturn-python-axolotl/Makefile             |  9 ++++
 saturn-python-axolotl/environment.yml      | 53 ++++++++++++++++++
 saturn-python-axolotl/recipe-template.json |  6 +++
 saturn-python-vllm/Dockerfile              | 20 +++++++
 saturn-python-vllm/Makefile                |  9 ++++
 saturn-python-vllm/environment.yml         | 62 ++++++++++++++++++++++
 saturn-python-vllm/recipe-template.json    |  6 +++
 8 files changed, 190 insertions(+)
 create mode 100644 saturn-python-axolotl/Dockerfile
 create mode 100644 saturn-python-axolotl/Makefile
 create mode 100644 saturn-python-axolotl/environment.yml
 create mode 100644 saturn-python-axolotl/recipe-template.json
 create mode 100644 saturn-python-vllm/Dockerfile
 create mode 100644 saturn-python-vllm/Makefile
 create mode 100644 saturn-python-vllm/environment.yml
 create mode 100644 saturn-python-vllm/recipe-template.json

diff --git a/saturn-python-axolotl/Dockerfile b/saturn-python-axolotl/Dockerfile
new file mode 100644
index 0000000..f4f04e1
--- /dev/null
+++ b/saturn-python-axolotl/Dockerfile
@@ -0,0 +1,25 @@
+ARG SATURNBASE_GPU_IMAGE
+FROM ${SATURNBASE_GPU_IMAGE}
+
+RUN sudo apt-get -qq --allow-releaseinfo-change update && \
+    sudo apt-get -qq install --yes --no-install-recommends \
+    libgl1
+
+COPY environment.yml /tmp/environment.yml
+# Unlike the serving image (saturn-python-llm), axolotl is installed WITH its
+# deps here: this image has no vLLM, so there is no transformers<5 constraint to
+# protect, and axolotl 0.16.1 needs the transformers 5.x API at runtime. Letting
+# it resolve its own tree (transformers 5.5, datasets 4.5, trl 0.29, hf-hub>=1,
+# accelerate 1.13, ...) is what makes training actually work. axolotl is declared
+# in environment.yml's pip: block with its extras, so a normal env update pulls
+# everything; no separate --no-deps step.
+RUN mamba env update -n saturn --file /tmp/environment.yml && \
+    ${CONDA_DIR}/envs/saturn/bin/python -m ipykernel install \
+        --name python3 \
+        --display-name 'saturn (Python 3)' \
+        --prefix=${CONDA_DIR} && \
+    ${CONDA_DIR}/bin/conda clean -afy && \
+    find ${CONDA_DIR} -type f,l -name '*.pyc' -delete && \
+    find ${CONDA_DIR} -type f,l -name '*.a' -delete && \
+    find ${CONDA_DIR} -type f,l -name '*.js.map' -delete
+RUN echo '' > ${CONDA_DIR}/envs/saturn/conda-meta/history
diff --git a/saturn-python-axolotl/Makefile b/saturn-python-axolotl/Makefile
new file mode 100644
index 0000000..aba3758
--- /dev/null
+++ b/saturn-python-axolotl/Makefile
@@ -0,0 +1,9 @@
+include .env_deps
+export
+
+build_image:
+	docker build \
+		--no-cache \
+		--build-arg SATURNBASE_GPU_IMAGE=${SATURNBASE_GPU_IMAGE} \
+		-t ${IMAGE} \
+		.
diff --git a/saturn-python-axolotl/environment.yml b/saturn-python-axolotl/environment.yml
new file mode 100644
index 0000000..54f933c
--- /dev/null
+++ b/saturn-python-axolotl/environment.yml
@@ -0,0 +1,53 @@
+name: saturn
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  - python=3.12
+  # TRAINING-ONLY image (axolotl). Split out from saturn-python-llm because that
+  # image must pin transformers<5 for vLLM 0.11 serving, but axolotl 0.16.1 is
+  # hard-coupled to the transformers 5.x API (e.g. Trainer.create_optimizer(model=)
+  # — a 5.x-only signature; on 4.57 training dies inside the loop). The two dep
+  # stacks are incompatible in one env, so serving lives in saturn-python-llm and
+  # training lives here. Because there is NO vLLM here, axolotl is installed WITH
+  # its deps (see Dockerfile) — no transformers pin, no --no-deps hack — and it
+  # pulls the correct transformers 5.5 / datasets 4.5 / trl 0.29 / hf-hub>=1 set.
+  - numpy
+  - psutil
+  - pandas
+  - tqdm
+  - click
+  - rich
+  - tensorboard
+  - wandb
+  - ipykernel
+  - pip
+  - pip:
+    - --extra-index-url https://download.pytorch.org/whl/cu129
+    # axolotl 0.16.1 pins torch==2.8.0; install it from the cu129 index so the
+    # GPU build is used (and so the flash-attn wheel below matches torch 2.8).
+    - torch==2.8.0
+    - torchvision
+    - torchaudio
+    # The whole fine-tuning stack. Unlike the serving image, we let axolotl
+    # resolve its own dependency tree (transformers 5.5.0, datasets 4.5.0,
+    # trl 0.29.0, accelerate 1.13.0, peft, hf-hub>=1, etc.) — installed WITH
+    # deps in the Dockerfile. [flash-attn] + [deepspeed] extras for real
+    # multi-GPU LoRA/full fine-tunes; [mlflow] for experiment tracking.
+    - axolotl[flash-attn,deepspeed,mlflow]==0.16.1
+    # flash-attn's build wants torch present at install time; ship the prebuilt
+    # cu12/torch2.8 wheel so it doesn't compile from source (slow, needs nvcc).
+    - https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3%2Bcu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
+    # Saturn workspace + ops basics (mirror the serving image's tail).
+    - gpustat
+    - black
+    - isort
+    - mypy
+    - pytest
+    - saturn-client
+    # The Token Factory inline training script (pdc/scripts/tf/finetune.py) only
+    # needs requests + PyYAML at runtime; both come in transitively (requests via
+    # axolotl/saturn-client, PyYAML via axolotl). Listed here for clarity / in
+    # case axolotl ever drops them.
+    - requests
+    - pyyaml
diff --git a/saturn-python-axolotl/recipe-template.json b/saturn-python-axolotl/recipe-template.json
new file mode 100644
index 0000000..554d645
--- /dev/null
+++ b/saturn-python-axolotl/recipe-template.json
@@ -0,0 +1,6 @@
+{
+    "name": "saturn-python-axolotl",
+    "description": "Fine-tuning LLMs with axolotl (transformers 5 training stack)",
+    "hardware_type": "gpu",
+    "supports": ["jupyterlab", "dask"]
+}
diff --git a/saturn-python-vllm/Dockerfile b/saturn-python-vllm/Dockerfile
new file mode 100644
index 0000000..fae3dd9
--- /dev/null
+++ b/saturn-python-vllm/Dockerfile
@@ -0,0 +1,20 @@
+ARG SATURNBASE_GPU_IMAGE
+FROM ${SATURNBASE_GPU_IMAGE}
+
+RUN sudo apt-get -qq --allow-releaseinfo-change update && \
+    sudo apt-get -qq install --yes --no-install-recommends \
+    libgl1
+
+COPY environment.yml /tmp/environment.yml
+# vLLM serving image. No axolotl here (it lives in saturn-python-axolotl), so the
+# former --no-deps axolotl install step is gone and a plain env update suffices.
+RUN mamba env update -n saturn --file /tmp/environment.yml && \
+    ${CONDA_DIR}/envs/saturn/bin/python -m ipykernel install \
+        --name python3 \
+        --display-name 'saturn (Python 3)' \
+        --prefix=${CONDA_DIR} && \
+    ${CONDA_DIR}/bin/conda clean -afy && \
+    find ${CONDA_DIR} -type f,l -name '*.pyc' -delete && \
+    find ${CONDA_DIR} -type f,l -name '*.a' -delete && \
+    find ${CONDA_DIR} -type f,l -name '*.js.map' -delete
+RUN echo '' > ${CONDA_DIR}/envs/saturn/conda-meta/history
diff --git a/saturn-python-vllm/Makefile b/saturn-python-vllm/Makefile
new file mode 100644
index 0000000..aba3758
--- /dev/null
+++ b/saturn-python-vllm/Makefile
@@ -0,0 +1,9 @@
+include .env_deps
+export
+
+build_image:
+	docker build \
+		--no-cache \
+		--build-arg SATURNBASE_GPU_IMAGE=${SATURNBASE_GPU_IMAGE} \
+		-t ${IMAGE} \
+		.
diff --git a/saturn-python-vllm/environment.yml b/saturn-python-vllm/environment.yml
new file mode 100644
index 0000000..e1379b8
--- /dev/null
+++ b/saturn-python-vllm/environment.yml
@@ -0,0 +1,62 @@
+name: saturn
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  - python=3.12
+  # INFERENCE/SERVING image (vLLM). Split out from the former saturn-python-llm
+  # (which tried to be one image for both training and serving). vLLM 0.11 boots
+  # only on transformers 4.x: transformers 5.x removed
+  # tokenizer.all_special_tokens_extended, which vLLM 0.11 reads at startup, so
+  # 5.x -> AttributeError -> CrashLoopBackOff. Fine-tuning (axolotl, which needs
+  # the transformers 5.x API) now lives in saturn-python-axolotl, so this image
+  # is free to pin transformers<5 without breaking training.
+  - transformers>=4.55,<5
+  - tokenizers
+  - numpy
+  - psutil
+  - pydantic
+  - fastapi
+  - uvicorn
+  - aiohttp
+  - requests
+  - typing-extensions
+  - packaging
+  - filelock
+  - matplotlib
+  - pandas
+  - seaborn
+  - tqdm
+  - click
+  - rich
+  - tensorboard
+  - wandb
+  - ipykernel
+  - pip
+  - pip:
+    - --extra-index-url https://download.pytorch.org/whl/cu129
+    - torch
+    - torchvision
+    - torchaudio
+    # Held below transformers 5 to match the conda pin above (the pip: block is
+    # run through pip by `mamba env update`, so repeat the bound to stop pip
+    # backtracking to 5.x).
+    - transformers>=4.55,<5
+    # vLLM serving stack.
+    - vllm==0.11.0
+    - ray
+    - sentence-transformers
+    # peft so vLLM can load the LoRA adapters Token Factory fine-tunes produce.
+    - peft
+    - accelerate
+    - bitsandbytes
+    - https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3%2Bcu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
+    - xformers
+    - gpustat
+    - nvidia-ml-py
+    - huggingface-hub
+    - black
+    - isort
+    - mypy
+    - pytest
+    - saturn-client
diff --git a/saturn-python-vllm/recipe-template.json b/saturn-python-vllm/recipe-template.json
new file mode 100644
index 0000000..9409275
--- /dev/null
+++ b/saturn-python-vllm/recipe-template.json
@@ -0,0 +1,6 @@
+{
+    "name": "saturn-python-vllm",
+    "description": "Serving LLMs with vLLM (inference; transformers 4 stack)",
+    "hardware_type": "gpu",
+    "supports": ["jupyterlab", "dask"]
+}