From 04f1ea684f9be5bb07a958fa74a4c3e59e20b14a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 21 Apr 2026 12:10:53 +0000 Subject: [PATCH 01/13] perftest: add Dragonfly SDK proxy performance test harness Introduce a complete performance testing framework for benchmarking Nydus image loading through Dragonfly's SDK proxy mode. The harness builds a minimal container image containing static nydusd, nydusctl, crane, and a Go workload that mounts a Nydus image via FUSE and reads all files in parallel. The GitHub Actions workflow orchestrates a full Dragonfly cluster (MySQL, Redis, manager, scheduler, dfdaemon) and runs the benchmark against configurable Nydus images. Results are captured in JSON format with assertions for successful workload completion and non-zero bytes read. Key components: - Static musl-based perftest image with all required binaries - Go workload for parallel file reading and metrics collection - GitHub Actions workflow for CI integration - Makefile target for local image building - Configuration templates and documentation Signed-off-by: Peng Tao --- .github/workflows/perftest.yml | 308 +++++++++++++++++++++++++++++ Makefile | 8 + misc/perftest/.dockerignore | 5 + misc/perftest/Dockerfile | 112 +++++++++++ misc/perftest/README.md | 178 +++++++++++++++++ misc/perftest/config.template.json | 42 ++++ misc/perftest/entrypoint.sh | 234 ++++++++++++++++++++++ misc/perftest/fetch-bootstrap.sh | 75 +++++++ misc/perftest/workload/go.mod | 3 + misc/perftest/workload/main.go | 232 ++++++++++++++++++++++ 10 files changed, 1197 insertions(+) create mode 100644 .github/workflows/perftest.yml create mode 100644 misc/perftest/.dockerignore create mode 100644 misc/perftest/Dockerfile create mode 100644 misc/perftest/README.md create mode 100644 misc/perftest/config.template.json create mode 100755 misc/perftest/entrypoint.sh create mode 100755 misc/perftest/fetch-bootstrap.sh create mode 100644 misc/perftest/workload/go.mod create mode 100644 misc/perftest/workload/main.go diff --git a/.github/workflows/perftest.yml b/.github/workflows/perftest.yml new file mode 100644 index 00000000000..65967ae4799 --- /dev/null +++ b/.github/workflows/perftest.yml @@ -0,0 +1,308 @@ +name: Perftest Image (Dragonfly SDK Proxy) + +on: + push: + branches: ["**", "stable/**"] + paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] + pull_request: + branches: ["**", "stable/**"] + paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] + workflow_dispatch: + inputs: + nydus_image: + description: 'Nydus image to benchmark' + required: false + default: 'ghcr.io/dragonflyoss/image-service/nginx:nydus-latest' + +permissions: + contents: read + +env: + DRAGONFLY_VERSION: "2.4.3" + CLIENT_VERSION: "1.3.3" + PERFTEST_IMAGE: "nydus-perftest:ci" + +jobs: + build-image: + name: Build perftest image + runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} + timeout-minutes: 60 + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Cache cargo registry / git via Buildx + uses: actions/cache@v4 + with: + path: | + ~/.cache/buildx-perftest + key: buildx-perftest-${{ hashFiles('Cargo.lock', 'misc/perftest/Dockerfile') }} + restore-keys: | + buildx-perftest- + + - name: Build perftest image + uses: docker/build-push-action@v6 + with: + context: . + file: misc/perftest/Dockerfile + tags: ${{ env.PERFTEST_IMAGE }} + load: true + build-args: | + RUST_TARGET=x86_64-unknown-linux-musl + cache-from: type=local,src=~/.cache/buildx-perftest + cache-to: type=local,dest=~/.cache/buildx-perftest,mode=max + + - name: Verify image bundles required binaries + run: | + docker run --rm --entrypoint /usr/local/bin/nydusd ${PERFTEST_IMAGE} --version + docker run --rm --entrypoint /usr/local/bin/nydusctl ${PERFTEST_IMAGE} --help | head -5 + docker run --rm --entrypoint /usr/local/bin/crane ${PERFTEST_IMAGE} version + docker run --rm --entrypoint /usr/local/bin/workload ${PERFTEST_IMAGE} --help 2>&1 | head -10 || true + # Sanity-check the binaries are static (no dynamic linker references). + docker run --rm --entrypoint /bin/sh ${PERFTEST_IMAGE} -c \ + 'for b in /usr/local/bin/nydusd /usr/local/bin/nydusctl /usr/local/bin/workload; do + echo "=== $b ===" + file "$b" 2>/dev/null || true + ldd "$b" 2>&1 || true + done' + + - name: Save image + run: | + docker save "${PERFTEST_IMAGE}" -o /tmp/perftest-image.tar + + - name: Upload image artifact + uses: actions/upload-artifact@v6 + with: + name: perftest-image + path: /tmp/perftest-image.tar + retention-days: 1 + + dragonfly-download: + name: Download Dragonfly binaries + runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} + timeout-minutes: 10 + steps: + - name: Cache Dragonfly binaries + id: cache-dragonfly + uses: actions/cache@v4 + with: + path: /tmp/dragonfly-bin + key: dragonfly-${{ env.DRAGONFLY_VERSION }}-client-${{ env.CLIENT_VERSION }}-linux-amd64 + + - name: Download Dragonfly server binaries + if: steps.cache-dragonfly.outputs.cache-hit != 'true' + run: | + mkdir -p /tmp/dragonfly-bin + wget -q -O /tmp/dragonfly-server.tar.gz \ + "https://github.com/dragonflyoss/dragonfly/releases/download/v${DRAGONFLY_VERSION}/dragonfly-${DRAGONFLY_VERSION}-linux-amd64.tar.gz" + tar -xzf /tmp/dragonfly-server.tar.gz -C /tmp/dragonfly-bin manager scheduler + rm /tmp/dragonfly-server.tar.gz + + - name: Download Dragonfly client binaries + if: steps.cache-dragonfly.outputs.cache-hit != 'true' + run: | + wget -q -O /tmp/dragonfly-client.tar.gz \ + "https://github.com/dragonflyoss/client/releases/download/v${CLIENT_VERSION}/dragonfly-client-v${CLIENT_VERSION}-x86_64-unknown-linux-musl.tar.gz" + tar -xzf /tmp/dragonfly-client.tar.gz --strip-components=1 -C /tmp/dragonfly-bin + rm /tmp/dragonfly-client.tar.gz + + - name: Upload Dragonfly Binaries + uses: actions/upload-artifact@v6 + with: + name: dragonfly-artifact + path: /tmp/dragonfly-bin + retention-days: 1 + + perftest-run: + name: Run perftest against Dragonfly + runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} + needs: [build-image, dragonfly-download] + timeout-minutes: 30 + env: + NYDUS_IMAGE: ${{ github.event.inputs.nydus_image || 'ghcr.io/dragonflyoss/image-service/nginx:nydus-latest' }} + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Download perftest image + uses: actions/download-artifact@v7 + with: + name: perftest-image + path: /tmp + + - name: Load perftest image + run: | + docker load -i /tmp/perftest-image.tar + docker images | grep nydus-perftest + + - name: Download Dragonfly artifacts + uses: actions/download-artifact@v7 + with: + name: dragonfly-artifact + path: /usr/local/bin + + - name: Install Dragonfly binaries + run: | + sudo chmod +x /usr/local/bin/manager /usr/local/bin/scheduler /usr/local/bin/dfdaemon + + # ---------- Dragonfly control plane (mirrors e2e-dragonfly.yml) ---------- + - name: Start MySQL + run: | + docker run -d --name mysql \ + -e MYSQL_ROOT_PASSWORD=dragonfly \ + -e MYSQL_DATABASE=manager \ + -p 3306:3306 \ + mysql:8 + for i in $(seq 1 60); do + if docker exec mysql mysqladmin ping -h 127.0.0.1 -u root -pdragonfly --silent 2>/dev/null; then + echo "MySQL is ready"; break + fi + if [ "$i" -eq 60 ]; then echo "ERROR: MySQL failed"; docker logs mysql; exit 1; fi + sleep 2 + done + + - name: Start Redis + run: | + docker run -d --name redis -p 6379:6379 redis:latest + for i in $(seq 1 30); do + if docker exec redis redis-cli ping 2>/dev/null | grep -q PONG; then + echo "Redis is ready"; break + fi + if [ "$i" -eq 30 ]; then echo "ERROR: Redis failed"; docker logs redis; exit 1; fi + sleep 1 + done + + - name: Setup Dragonfly configs + run: | + sudo mkdir -p /etc/dragonfly + sudo cp misc/dragonfly/manager.yaml /etc/dragonfly/manager.yaml + sudo cp misc/dragonfly/scheduler.yaml /etc/dragonfly/scheduler.yaml + sudo cp misc/dragonfly/dfdaemon.yaml /etc/dragonfly/dfdaemon.yaml + sudo mkdir -p /tmp/dragonfly/logs /tmp/dragonfly/cache /tmp/dragonfly/storage + sudo chmod 777 /tmp/dragonfly/logs /tmp/dragonfly/cache /tmp/dragonfly/storage + mkdir -p /tmp/perftest-results + + - name: Start Manager + run: | + sudo nohup /usr/local/bin/manager --config /etc/dragonfly/manager.yaml \ + > /tmp/dragonfly/logs/manager.log 2>&1 & + for i in $(seq 1 60); do + if curl -fsS http://127.0.0.1:8080/healthy >/dev/null 2>&1; then + echo "Manager is ready"; break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: Manager failed"; sudo cat /tmp/dragonfly/logs/manager.log || true; exit 1 + fi + sleep 2 + done + + - name: Start Scheduler + run: | + sudo nohup /usr/local/bin/scheduler --config /etc/dragonfly/scheduler.yaml \ + > /tmp/dragonfly/logs/scheduler.log 2>&1 & + # Scheduler doesn't expose a friendly health endpoint here; just give it a moment + # and verify the gRPC port is listening. + for i in $(seq 1 30); do + if ss -tln 2>/dev/null | grep -q ':8002'; then + echo "Scheduler is listening on :8002"; break + fi + if [ "$i" -eq 30 ]; then + echo "ERROR: Scheduler failed"; sudo cat /tmp/dragonfly/logs/scheduler.log || true; exit 1 + fi + sleep 2 + done + + - name: Start dfdaemon + run: | + sudo nohup /usr/local/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml \ + > /tmp/dragonfly/logs/dfdaemon.log 2>&1 & + for i in $(seq 1 60); do + if ss -tln 2>/dev/null | grep -q ':4001'; then + echo "dfdaemon proxy is listening on :4001"; break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: dfdaemon failed"; sudo cat /tmp/dragonfly/logs/dfdaemon.log || true; exit 1 + fi + sleep 2 + done + + # ---------- Run the perftest container ---------- + - name: Run perftest container + run: | + mkdir -p /tmp/perftest-results + # Allow the container to reach the host's dfdaemon via host.docker.internal. + docker run --rm \ + --name nydus-perftest-run \ + --add-host host.docker.internal:host-gateway \ + --privileged \ + --device /dev/fuse \ + -e NYDUS_IMAGE="${NYDUS_IMAGE}" \ + -e DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ + -e DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ + -e READ_PARALLELISM=8 \ + -e MAX_FILES=200 \ + -e MOUNT_READY_TIMEOUT=120 \ + -v /tmp/perftest-results:/results \ + "${PERFTEST_IMAGE}" + + - name: Show result.json + if: always() + run: | + if [ -f /tmp/perftest-results/result.json ]; then + echo "=== result.json ===" + cat /tmp/perftest-results/result.json + else + echo "ERROR: result.json was not produced." + ls -la /tmp/perftest-results || true + exit 1 + fi + + - name: Assert benchmark succeeded + run: | + # Require: workload exited 0, at least one file was read, and bytes > 0. + jq -e ' + (.workload_rc // 1) == 0 + and (.workload.files_read // 0) > 0 + and (.workload.bytes_read // 0) > 0 + ' /tmp/perftest-results/result.json + + - name: Upload result.json + if: always() + uses: actions/upload-artifact@v6 + with: + name: perftest-result + path: /tmp/perftest-results/ + + - name: Dump service logs + if: always() + continue-on-error: true + run: | + mkdir -p /tmp/perftest-logs + sudo cp /tmp/dragonfly/logs/*.log /tmp/perftest-logs/ 2>/dev/null || true + sudo cp -r /var/log/dragonfly/dfdaemon/ /tmp/perftest-logs/dfdaemon/ 2>/dev/null || true + sudo cp -r /var/log/dragonfly/scheduler/ /tmp/perftest-logs/scheduler/ 2>/dev/null || true + sudo cp -r /var/log/dragonfly/manager/ /tmp/perftest-logs/manager/ 2>/dev/null || true + docker logs mysql > /tmp/perftest-logs/mysql.log 2>&1 || true + docker logs redis > /tmp/perftest-logs/redis.log 2>&1 || true + sudo chmod -R a+r /tmp/perftest-logs || true + + - name: Upload service logs + if: always() + uses: actions/upload-artifact@v6 + with: + name: perftest-service-logs + path: /tmp/perftest-logs/ + + - name: Cleanup + if: always() + continue-on-error: true + run: | + docker rm -f nydus-perftest-run mysql redis 2>/dev/null || true + for proc in dfdaemon scheduler manager; do + if pid=$(pgrep -n "${proc}" 2>/dev/null); then + sudo kill "${pid}" 2>/dev/null || true + fi + done diff --git a/Makefile b/Makefile index 6953b657b19..e3dede36826 100644 --- a/Makefile +++ b/Makefile @@ -209,3 +209,11 @@ nydus-overlayfs-lint: docker-static: docker build -t nydus-rs-static --build-arg RUST_TARGET=${RUST_TARGET_STATIC} misc/musl-static docker run --rm ${CARGO_BUILD_GEARS} -e RUST_TARGET=${RUST_TARGET_STATIC} --workdir /nydus-rs -v ${current_dir}:/nydus-rs nydus-rs-static + +# Build the perf-test image (Dragonfly proxy SDK mode). See misc/perftest/README.md. +PERFTEST_IMAGE ?= nydus-perftest:latest +perftest-image: + docker build -f misc/perftest/Dockerfile \ + --build-arg RUST_TARGET=${RUST_TARGET_STATIC} \ + -t ${PERFTEST_IMAGE} ${current_dir} +.PHONY: perftest-image diff --git a/misc/perftest/.dockerignore b/misc/perftest/.dockerignore new file mode 100644 index 00000000000..5fa644c21d0 --- /dev/null +++ b/misc/perftest/.dockerignore @@ -0,0 +1,5 @@ +target/ +**/target/ +.git/ +*.profraw +coverage/ diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile new file mode 100644 index 00000000000..2db21d058f2 --- /dev/null +++ b/misc/perftest/Dockerfile @@ -0,0 +1,112 @@ +# syntax=docker/dockerfile:1.6 +# +# Nydus performance test image for Dragonfly proxy SDK mode. +# +# This image bundles a statically-linked nydusd (musl) plus a small benchmark +# harness that mounts a Nydus image via FUSE and reads every regular file +# in parallel, exercising the Dragonfly SDK proxy path. Dragonfly (dfdaemon +# + scheduler) is expected to be deployed OUTSIDE the container; the proxy +# and scheduler endpoints are passed via environment variables. +# +# See README.md in this directory for usage. + +# ----------------------------------------------------------------------------- +# Stage 1: build a static nydusd (and nydusctl) on musl. +# +# `make static-release` honours the workspace defaults plus virtiofs on Linux. +# The `backend-dragonfly-proxy` feature is target-gated for x86_64/aarch64 in +# the root Cargo.toml, so it is automatically enabled on supported arches. +# ----------------------------------------------------------------------------- +ARG RUST_TARGET=x86_64-unknown-linux-musl +FROM clux/muslrust:1.84.0-stable AS builder + +ARG RUST_TARGET +ENV RUST_TARGET=${RUST_TARGET} + +# protoc is required by tonic / dragonfly-api build scripts. +RUN apt-get update \ + && apt-get install -y --no-install-recommends cmake protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /nydus-rs +COPY . /nydus-rs + +RUN rustup target add "${RUST_TARGET}" + +# Lean build: directly invoke cargo for the nydus binaries we ship, skipping +# fmt/clippy and contrib targets that `make static-release` would also run. +# The `backend-dragonfly-proxy` storage feature is target-gated for x86_64 / +# aarch64 in the workspace root Cargo.toml, so it is automatically enabled on +# the supported architectures. +RUN cargo build --release --target "${RUST_TARGET}" \ + --bin nydusd --bin nydusctl --features=virtiofs \ + && cp target/${RUST_TARGET}/release/nydusd /out-nydusd \ + && cp target/${RUST_TARGET}/release/nydusctl /out-nydusctl \ + && /out-nydusd --version + +# ----------------------------------------------------------------------------- +# Stage 2: Go builder. Produces a static `crane` for portable image manifest / +# blob access AND the static `workload` benchmark binary used by the harness. +# ----------------------------------------------------------------------------- +FROM golang:1.22-alpine AS go-builder +RUN apk add --no-cache git ca-certificates \ + && go install github.com/google/go-containerregistry/cmd/crane@v0.20.2 \ + && cp /go/bin/crane /out-crane + +WORKDIR /workload +COPY misc/perftest/workload/ /workload/ +RUN CGO_ENABLED=0 go build -trimpath -ldflags '-s -w' -o /out-workload ./... + +# ----------------------------------------------------------------------------- +# Stage 3: minimal runtime image. +# +# Alpine is fine here because nydusd/nydusctl are fully static (musl) and the +# workload binary is a pure-Go static build. The only runtime tooling we need +# beyond bash is fuse3 (for the FUSE mount), jq (for JSON munging), gettext +# (envsubst), curl + tar + ca-certificates (used by crane via TLS). +# ----------------------------------------------------------------------------- +FROM alpine:3.20 + +RUN apk add --no-cache \ + bash \ + ca-certificates \ + coreutils \ + curl \ + fuse3 \ + gettext \ + jq \ + tar \ + tini \ + && mkdir -p /etc/nydus /mnt/nydus /results /var/lib/nydus/cache /tmp/nydus + +COPY --from=builder /out-nydusd /usr/local/bin/nydusd +COPY --from=builder /out-nydusctl /usr/local/bin/nydusctl +COPY --from=go-builder /out-crane /usr/local/bin/crane +COPY --from=go-builder /out-workload /usr/local/bin/workload + +COPY misc/perftest/entrypoint.sh /usr/local/bin/perftest-entrypoint +COPY misc/perftest/fetch-bootstrap.sh /usr/local/bin/fetch-bootstrap +COPY misc/perftest/config.template.json /etc/nydus/config.template.json + +RUN chmod +x /usr/local/bin/perftest-entrypoint \ + /usr/local/bin/fetch-bootstrap + +# Sensible defaults; all are overridable at `docker run` time. +ENV NYDUS_IMAGE="ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" \ + DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ + DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ + REGISTRY_SCHEME="https" \ + MOUNT_POINT="/mnt/nydus" \ + RESULTS_DIR="/results" \ + WORK_DIR="/tmp/nydus" \ + BLOB_CACHE_DIR="/var/lib/nydus/cache" \ + READ_PARALLELISM="16" \ + READ_CHUNK_SIZE="1048576" \ + MAX_FILES="0" \ + MOUNT_READY_TIMEOUT="60" \ + NYDUSD_LOG_LEVEL="info" \ + PLATFORM="linux/amd64" + +VOLUME ["/results"] +ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/perftest-entrypoint"] +CMD [] diff --git a/misc/perftest/README.md b/misc/perftest/README.md new file mode 100644 index 00000000000..ecfa12b5e42 --- /dev/null +++ b/misc/perftest/README.md @@ -0,0 +1,178 @@ +# nydus perftest image (Dragonfly proxy SDK mode) + +A self-contained container image that mounts a Nydus image via FUSE and +benchmarks cold-cache parallel reads through **Dragonfly proxy SDK mode**. + +Dragonfly itself (dfdaemon + scheduler) is expected to run **outside** this +container. The proxy and scheduler endpoints are passed in via environment +variables; the nydusd binary is built statically (musl) so the image runs on +any Linux host without glibc / kernel-userland coupling. + +## What it measures + +The benchmark performs a single cold-cache pass: + +1. nydusd is started with the supplied config and a freshly-fetched bootstrap. +2. The harness waits until FUSE is mounted *and* `nydusctl info` reports + `state == RUNNING`. The elapsed time is recorded as **mount_ready_sec**. +3. A thread pool reads every regular file under the mountpoint in parallel + chunks. The harness records: + - `files_read`, `bytes_read`, `wall_clock_sec` + - `throughput_mbps = bytes_read / wall_clock` + - per-file read `latency_ms` (mean, p50, p90, p95, p99) +4. `nydusctl info` and `nydusctl metrics` (backend, blobcache, fs) are scraped + at the end and embedded in the JSON summary so you can confirm requests + actually went through the SDK proxy path. + +> **Cold-cache caveat.** Each container run starts with an empty local +> blobcache (`BLOB_CACHE_DIR`, default `/var/lib/nydus/cache`). However, the +> external dfdaemon's own cache persists across runs. For a true cold pass: +> either flush the dfdaemon cache between runs, or use a unique image per run. + +## Building + +From the repo root: + +```bash +make perftest-image # builds nydus-perftest:latest +# or directly: +docker build -f misc/perftest/Dockerfile -t nydus-perftest:latest . +``` + +The Dockerfile has three stages: + +1. A musl Rust builder (`clux/muslrust`) that runs + `cargo build --release --target $RUST_TARGET --bin nydusd --bin nydusctl --features=virtiofs`. + The `backend-dragonfly-proxy` storage feature is target-gated for x86_64 / + aarch64 in the workspace `Cargo.toml`, so it is automatically enabled on + supported arches. `protoc` + `cmake` are pre-installed for the tonic / + dragonfly-api build scripts. +2. A Go builder (`golang:1.22-alpine`) that produces a static `crane` binary + (for image manifest / bootstrap resolution) and a static `workload` binary + built from `misc/perftest/workload/`. +3. An alpine runtime containing only `nydusd`, `nydusctl`, `crane`, + `workload`, plus `bash`, `fuse3`, `jq`, `gettext`, `tar`, and `tini`. + +For arm64 hosts, set `--build-arg RUST_TARGET=aarch64-unknown-linux-musl` +(or `make perftest-image RUST_TARGET_STATIC=aarch64-unknown-linux-musl`). + +## Running + +Minimum invocation against an external Dragonfly (dfdaemon listening on the +host at `:4001`, scheduler at `:8002`): + +```bash +docker run --rm \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ + --add-host host.docker.internal:host-gateway \ + -e NYDUS_IMAGE=ghcr.io/dragonflyoss/image-service/nginx:nydus-latest \ + -e DRAGONFLY_PROXY_URL=http://host.docker.internal:4001 \ + -e DRAGONFLY_SCHEDULER_ENDPOINT=http://host.docker.internal:8002 \ + -v "$PWD/results:/results" \ + nydus-perftest:latest +``` + +If FUSE inside the container fails on your host, fall back to `--privileged` +(rootless Docker / Podman often need this). The `--add-host` flag is only +needed on Linux to make `host.docker.internal` resolve to the host gateway. + +The summary is written to `./results/result.json` and printed to stderr. + +## Configuration + +### Option A: bring your own nydusd config (recommended for real workloads) + +```bash +-v /path/to/nydusd.json:/etc/nydus/user.json:ro \ +-e NYDUSD_CONFIG=/etc/nydus/user.json +``` + +When `NYDUSD_CONFIG` is set and points to an existing file, the harness uses +it verbatim. You are responsible for setting `host`/`repo`/`proxy.url`/ +`proxy.dragonfly_scheduler_endpoint` correctly. You may also pre-supply a +bootstrap file with `-v ...:/path/bootstrap -e BOOTSTRAP_PATH=/path/bootstrap` +to skip the registry fetch. + +### Option B: render config from template + +When `NYDUSD_CONFIG` is unset, `config.template.json` is rendered with these +env vars (defaults shown): + +| Variable | Default | Notes | +|--------------------------------|--------------------------------------------------------------|-------| +| `NYDUS_IMAGE` | `ghcr.io/dragonflyoss/image-service/nginx:nydus-latest` | Full image ref. Parsed into REGISTRY_HOST/REPO. | +| `REGISTRY_HOST` | (parsed from NYDUS_IMAGE) | Override if the parser guesses wrong. | +| `REGISTRY_REPO` | (parsed from NYDUS_IMAGE) | | +| `REGISTRY_SCHEME` | `https` | | +| `REGISTRY_AUTH` | empty | base64(user:pass) for basic auth. | +| `REGISTRY_SKIP_VERIFY` | `false` | | +| `DRAGONFLY_PROXY_URL` | `http://host.docker.internal:4001` | dfdaemon proxy listen URL. | +| `DRAGONFLY_SCHEDULER_ENDPOINT` | `http://host.docker.internal:8002` | Non-empty value enables SDK mode. | +| `PROXY_FALLBACK` | `true` | Fall back to direct registry if proxy is unhealthy. | +| `BLOB_CACHE_DIR` | `/var/lib/nydus/cache` | nydusd blobcache work_dir. | +| `PREFETCH_ENABLE` | `false` | Background prefetch threads. | +| `PREFETCH_THREADS` | `8` | | + +### Workload knobs + +| Variable | Default | Notes | +|----------------------|---------|-------| +| `READ_PARALLELISM` | `16` | Concurrent file readers. | +| `READ_CHUNK_SIZE` | `1048576` | Bytes per `read()` call. | +| `MAX_FILES` | `0` | Cap files read; 0 = no cap. | +| `MOUNT_READY_TIMEOUT`| `60` | Seconds to wait for FUSE + RUNNING. | +| `NYDUSD_LOG_LEVEL` | `info` | trace/debug/info/warn/error. | +| `PLATFORM` | `linux/amd64` | OCI platform for multi-arch images. | +| `RESULTS_DIR` | `/results` | Where `result.json` is written. | + +### Bootstrap + +The bootstrap (image metadata) is required by nydusd. By default the harness +fetches it from `NYDUS_IMAGE` using `crane` (manifest -> bootstrap layer -> +untar to extract `image.boot`). To skip this step, mount a pre-extracted +bootstrap file and set `BOOTSTRAP_PATH=/path/to/bootstrap`. + +## Output + +`$RESULTS_DIR/result.json` has the shape: + +```jsonc +{ + "image": "ghcr.io/.../nginx:nydus-latest", + "platform": "linux/amd64", + "bootstrap_path": "/tmp/nydus/bootstrap", + "config_path": "/tmp/nydus/nydusd.json", + "dragonfly": { "proxy_url": "...", "scheduler_endpoint": "...", "proxy_fallback": true }, + "timing_sec": { "mount_ready": 0.643, "workload": 12.518 }, + "workload": { "files_read": 1213, "bytes_read": 142860288, "throughput_mbps": 11.42, + "latency_ms": { "mean": 18.4, "p50": 9.1, "p90": 41.7, "p95": 63.2, "p99": 121.0 } }, + "workload_rc": 0, + "nydusd": { + "info": { ... }, // nydusctl info + "backend": { ... }, // backend metrics: bytes pulled, request count + "blobcache": { ... }, // cache hit/miss + "fs": { ... } // fs-level counters + } +} +``` + +Inspect `nydusd.backend` to confirm requests actually went through the SDK +proxy path; the request counters there are what tells you the SDK was used. + +## Limitations / known gotchas + +- **External dfdaemon cache is not flushed by this image.** If you want + cold-from-Dragonfly results, drop the dfdaemon cache between runs or + rotate the test image. +- **Bootstrap fetch goes direct (not through the proxy).** This is setup, + not the measured path; the data-blob reads are what's actually being + benchmarked through the SDK. +- **Single cold-pass only.** No warm-cache or baseline modes — keep this + image focused. If you need them, run twice with different cache dirs. +- **FUSE in containers** is fragile: rootless runtimes, locked-down seccomp + profiles, or AppArmor policies can all block the mount. Use + `--privileged` if the documented `--cap-add SYS_ADMIN --device /dev/fuse` + combination doesn't work on your host. diff --git a/misc/perftest/config.template.json b/misc/perftest/config.template.json new file mode 100644 index 00000000000..da7a31d02ac --- /dev/null +++ b/misc/perftest/config.template.json @@ -0,0 +1,42 @@ +{ + "device": { + "backend": { + "type": "registry", + "config": { + "scheme": "${REGISTRY_SCHEME}", + "host": "${REGISTRY_HOST}", + "repo": "${REGISTRY_REPO}", + "auth": "${REGISTRY_AUTH}", + "skip_verify": ${REGISTRY_SKIP_VERIFY}, + "timeout": 30, + "connect_timeout": 10, + "retry_limit": 2, + "proxy": { + "url": "${DRAGONFLY_PROXY_URL}", + "ping_url": "${DRAGONFLY_PROXY_URL}", + "fallback": ${PROXY_FALLBACK}, + "check_interval": 5, + "use_http": false, + "dragonfly_scheduler_endpoint": "${DRAGONFLY_SCHEDULER_ENDPOINT}" + } + } + }, + "cache": { + "type": "blobcache", + "compressed": false, + "config": { + "work_dir": "${BLOB_CACHE_DIR}" + } + } + }, + "mode": "direct", + "digest_validate": false, + "iostats_files": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": ${PREFETCH_ENABLE}, + "threads_count": ${PREFETCH_THREADS}, + "merging_size": 131072, + "bandwidth_rate": 0 + } +} diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh new file mode 100755 index 00000000000..5e3ce9c74b1 --- /dev/null +++ b/misc/perftest/entrypoint.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# +# Nydus + Dragonfly SDK proxy perf-test orchestrator. +# +# Phases: +# 1. Resolve the nydusd config: +# - If $NYDUSD_CONFIG points to an existing file, use it as-is. +# - Otherwise, render /etc/nydus/config.template.json with envsubst. +# 2. Resolve the bootstrap: +# - If $BOOTSTRAP_PATH is set and exists, use it. +# - Otherwise, fetch from $NYDUS_IMAGE via crane (see fetch-bootstrap). +# 3. Start nydusd in FUSE mode with --apisock for telemetry. +# 4. Wait for FUSE to be mounted AND nydusd to report state RUNNING. +# 5. Run the parallel-read workload over $MOUNT_POINT. +# 6. Scrape nydusd metrics and emit a JSON summary to $RESULTS_DIR/result.json. +# 7. Unmount and exit cleanly. +set -euo pipefail + +log() { printf '[perftest] %s\n' "$*" >&2; } +die() { printf '[perftest] ERROR: %s\n' "$*" >&2; exit 1; } + +# ---- Inputs ---------------------------------------------------------------- +NYDUS_IMAGE="${NYDUS_IMAGE:-}" +NYDUSD_CONFIG="${NYDUSD_CONFIG:-}" +BOOTSTRAP_PATH="${BOOTSTRAP_PATH:-}" +MOUNT_POINT="${MOUNT_POINT:-/mnt/nydus}" +RESULTS_DIR="${RESULTS_DIR:-/results}" +WORK_DIR="${WORK_DIR:-/tmp/nydus}" +BLOB_CACHE_DIR="${BLOB_CACHE_DIR:-/var/lib/nydus/cache}" +READ_PARALLELISM="${READ_PARALLELISM:-16}" +READ_CHUNK_SIZE="${READ_CHUNK_SIZE:-1048576}" +MAX_FILES="${MAX_FILES:-0}" +MOUNT_READY_TIMEOUT="${MOUNT_READY_TIMEOUT:-60}" +NYDUSD_LOG_LEVEL="${NYDUSD_LOG_LEVEL:-info}" +PLATFORM="${PLATFORM:-linux/amd64}" + +DRAGONFLY_PROXY_URL="${DRAGONFLY_PROXY_URL:-http://host.docker.internal:4001}" +DRAGONFLY_SCHEDULER_ENDPOINT="${DRAGONFLY_SCHEDULER_ENDPOINT:-http://host.docker.internal:8002}" +REGISTRY_SCHEME="${REGISTRY_SCHEME:-https}" +REGISTRY_AUTH="${REGISTRY_AUTH:-}" +REGISTRY_SKIP_VERIFY="${REGISTRY_SKIP_VERIFY:-false}" +PROXY_FALLBACK="${PROXY_FALLBACK:-true}" +PREFETCH_ENABLE="${PREFETCH_ENABLE:-false}" +PREFETCH_THREADS="${PREFETCH_THREADS:-8}" + +mkdir -p "${WORK_DIR}" "${RESULTS_DIR}" "${BLOB_CACHE_DIR}" "${MOUNT_POINT}" + +APISOCK="${WORK_DIR}/api.sock" +NYDUSD_LOG="${WORK_DIR}/nydusd.log" +RESULT_JSON="${RESULTS_DIR}/result.json" + +# ---- Phase 1: resolve config ---------------------------------------------- +if [ -n "${NYDUSD_CONFIG}" ] && [ -f "${NYDUSD_CONFIG}" ]; then + CONFIG_PATH="${NYDUSD_CONFIG}" + log "Using user-supplied nydusd config: ${CONFIG_PATH}" +else + [ -n "${NYDUS_IMAGE}" ] || die "either NYDUSD_CONFIG or NYDUS_IMAGE must be set" + + # Parse NYDUS_IMAGE into REGISTRY_HOST and REGISTRY_REPO. The first path + # segment is the host iff it contains '.' or ':' or equals 'localhost'; + # otherwise we default to docker.io with the 'library/' prefix when only + # a single name segment is present (matching docker's reference parser). + REF="${NYDUS_IMAGE%@*}" + REF="${REF%:*}" + if [[ "${REF}" == */* ]]; then + first="${REF%%/*}"; rest="${REF#*/}" + if [[ "${first}" == *.* || "${first}" == *:* || "${first}" == "localhost" ]]; then + REGISTRY_HOST="${REGISTRY_HOST:-${first}}" + REGISTRY_REPO="${REGISTRY_REPO:-${rest}}" + else + REGISTRY_HOST="${REGISTRY_HOST:-docker.io}" + REGISTRY_REPO="${REGISTRY_REPO:-${REF}}" + fi + else + REGISTRY_HOST="${REGISTRY_HOST:-docker.io}" + REGISTRY_REPO="${REGISTRY_REPO:-library/${REF}}" + fi + export REGISTRY_HOST REGISTRY_REPO REGISTRY_SCHEME REGISTRY_AUTH \ + REGISTRY_SKIP_VERIFY PROXY_FALLBACK \ + DRAGONFLY_PROXY_URL DRAGONFLY_SCHEDULER_ENDPOINT \ + BLOB_CACHE_DIR PREFETCH_ENABLE PREFETCH_THREADS + + CONFIG_PATH="${WORK_DIR}/nydusd.json" + envsubst < /etc/nydus/config.template.json > "${CONFIG_PATH}" + log "Rendered config -> ${CONFIG_PATH}" + log " registry: ${REGISTRY_SCHEME}://${REGISTRY_HOST}/${REGISTRY_REPO}" + log " proxy: ${DRAGONFLY_PROXY_URL} scheduler: ${DRAGONFLY_SCHEDULER_ENDPOINT}" +fi + +# ---- Phase 2: resolve bootstrap ------------------------------------------- +if [ -n "${BOOTSTRAP_PATH}" ] && [ -f "${BOOTSTRAP_PATH}" ]; then + log "Using user-supplied bootstrap: ${BOOTSTRAP_PATH}" +else + [ -n "${NYDUS_IMAGE}" ] || die "BOOTSTRAP_PATH not set and NYDUS_IMAGE empty; cannot fetch bootstrap" + log "Fetching bootstrap from ${NYDUS_IMAGE} (platform=${PLATFORM})" + BOOTSTRAP_PATH="$(NYDUS_IMAGE="${NYDUS_IMAGE}" PLATFORM="${PLATFORM}" \ + WORK_DIR="${WORK_DIR}" /usr/local/bin/fetch-bootstrap)" +fi + +# ---- Phase 3: start nydusd ------------------------------------------------- +log "Starting nydusd: bootstrap=${BOOTSTRAP_PATH} mountpoint=${MOUNT_POINT}" +T_DAEMON_START=$(date +%s.%N) + +nydusd \ + --config "${CONFIG_PATH}" \ + --bootstrap "${BOOTSTRAP_PATH}" \ + --mountpoint "${MOUNT_POINT}" \ + --apisock "${APISOCK}" \ + --log-level "${NYDUSD_LOG_LEVEL}" \ + > "${NYDUSD_LOG}" 2>&1 & +NYDUSD_PID=$! + +cleanup() { + rc=$? + log "Cleanup (rc=${rc})" + if mountpoint -q "${MOUNT_POINT}" 2>/dev/null; then + umount "${MOUNT_POINT}" 2>/dev/null || umount -l "${MOUNT_POINT}" 2>/dev/null || true + fi + if kill -0 "${NYDUSD_PID}" 2>/dev/null; then + kill "${NYDUSD_PID}" 2>/dev/null || true + wait "${NYDUSD_PID}" 2>/dev/null || true + fi + if [ "${rc}" -ne 0 ] && [ -f "${NYDUSD_LOG}" ]; then + log "--- nydusd.log (tail) ---" + tail -n 80 "${NYDUSD_LOG}" >&2 || true + fi +} +trap cleanup EXIT + +# ---- Phase 4: wait for readiness ------------------------------------------ +log "Waiting up to ${MOUNT_READY_TIMEOUT}s for FUSE mount and daemon RUNNING state..." +T_MOUNT_READY="" +deadline=$(( $(date +%s) + MOUNT_READY_TIMEOUT )) +while [ "$(date +%s)" -lt "${deadline}" ]; do + if ! kill -0 "${NYDUSD_PID}" 2>/dev/null; then + die "nydusd exited prematurely (see ${NYDUSD_LOG})" + fi + if mountpoint -q "${MOUNT_POINT}" && [ -S "${APISOCK}" ]; then + state=$(nydusctl --sock "${APISOCK}" --raw info 2>/dev/null \ + | jq -r '.state // ""' 2>/dev/null || true) + if [ "${state}" = "RUNNING" ] || [ "${state}" = "Running" ]; then + T_MOUNT_READY=$(date +%s.%N); break + fi + fi + sleep 0.2 +done +[ -n "${T_MOUNT_READY}" ] || die "timed out waiting for nydusd to become RUNNING" + +MOUNT_READY_SEC=$(awk -v a="${T_MOUNT_READY}" -v b="${T_DAEMON_START}" 'BEGIN{printf "%.3f", a-b}') +log "Mount ready in ${MOUNT_READY_SEC}s" + +# ---- Phase 5: workload ----------------------------------------------------- +log "Running workload (parallelism=${READ_PARALLELISM}, chunk=${READ_CHUNK_SIZE} bytes, max_files=${MAX_FILES})" +WORKLOAD_OUT="${WORK_DIR}/workload.json" +T_WORKLOAD_START=$(date +%s.%N) +set +e +workload \ + --root "${MOUNT_POINT}" \ + --parallelism "${READ_PARALLELISM}" \ + --chunk-size "${READ_CHUNK_SIZE}" \ + --max-files "${MAX_FILES}" \ + --output "${WORKLOAD_OUT}" +WORKLOAD_RC=$? +set -e +T_WORKLOAD_END=$(date +%s.%N) +WORKLOAD_SEC=$(awk -v a="${T_WORKLOAD_END}" -v b="${T_WORKLOAD_START}" 'BEGIN{printf "%.3f", a-b}') +log "Workload finished in ${WORKLOAD_SEC}s (rc=${WORKLOAD_RC})" + +# ---- Phase 6: scrape metrics + emit summary ------------------------------- +# Each scrape must produce valid JSON for jq's --slurpfile to work. +scrape() { + local out + out=$(nydusctl --sock "${APISOCK}" --raw "$@" 2>/dev/null) || out="" + if [ -z "${out}" ] || ! printf '%s' "${out}" | jq -e . >/dev/null 2>&1; then + echo "{}" + else + printf '%s' "${out}" + fi +} +echo "$(scrape info)" > "${WORK_DIR}/info.json" +echo "$(scrape metrics backend)" > "${WORK_DIR}/backend.json" +echo "$(scrape metrics cache)" > "${WORK_DIR}/cache.json" +echo "$(scrape metrics fsstats)" > "${WORK_DIR}/fsstats.json" + +[ -f "${WORKLOAD_OUT}" ] || echo '{}' > "${WORKLOAD_OUT}" + +jq -n \ + --arg image "${NYDUS_IMAGE:-}" \ + --arg platform "${PLATFORM}" \ + --arg config_path "${CONFIG_PATH}" \ + --arg bootstrap_path "${BOOTSTRAP_PATH}" \ + --arg proxy_url "${DRAGONFLY_PROXY_URL}" \ + --arg scheduler "${DRAGONFLY_SCHEDULER_ENDPOINT}" \ + --argjson proxy_fb "$([ "${PROXY_FALLBACK}" = "true" ] && echo true || echo false)" \ + --argjson mount_ready "${MOUNT_READY_SEC}" \ + --argjson workload_sec "${WORKLOAD_SEC}" \ + --argjson workload_rc "${WORKLOAD_RC}" \ + --slurpfile workload "${WORKLOAD_OUT}" \ + --slurpfile info "${WORK_DIR}/info.json" \ + --slurpfile backend "${WORK_DIR}/backend.json" \ + --slurpfile blobcache "${WORK_DIR}/cache.json" \ + --slurpfile fs "${WORK_DIR}/fsstats.json" \ + '{ + image: $image, + platform: $platform, + config_path: $config_path, + bootstrap_path: $bootstrap_path, + dragonfly: { proxy_url: $proxy_url, scheduler_endpoint: $scheduler, proxy_fallback: $proxy_fb }, + timing_sec: { mount_ready: $mount_ready, workload: $workload_sec }, + workload_rc: $workload_rc, + workload: ($workload[0] // {}), + nydusd: { + info: ($info[0] // {}), + backend: ($backend[0] // {}), + blobcache: ($blobcache[0] // {}), + fs: ($fs[0] // {}) + } + }' > "${RESULT_JSON}" + +log "Wrote summary to ${RESULT_JSON}" +echo "================ PERF TEST SUMMARY ================" >&2 +jq -r ' + "image : \(.image)", + "mount_ready_sec : \(.timing_sec.mount_ready)", + "workload_sec : \(.timing_sec.workload)", + "files_read : \(.workload.files_read // 0) (skipped=\(.workload.files_skipped // 0), errors=\(.workload.files_errored // 0))", + "bytes_read : \(.workload.bytes_read // 0)", + "throughput_MBps : \(.workload.throughput_mbps // 0)", + "latency_ms p50/p95/p99 : \(.workload.latency_ms.p50 // 0) / \(.workload.latency_ms.p95 // 0) / \(.workload.latency_ms.p99 // 0)", + "workload_rc : \(.workload_rc)" +' "${RESULT_JSON}" >&2 +echo "===================================================" >&2 + +exit "${WORKLOAD_RC}" diff --git a/misc/perftest/fetch-bootstrap.sh b/misc/perftest/fetch-bootstrap.sh new file mode 100755 index 00000000000..6936143178e --- /dev/null +++ b/misc/perftest/fetch-bootstrap.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# +# Resolve and extract the Nydus bootstrap (image.boot) for a Nydus image. +# +# Inputs (env): +# NYDUS_IMAGE full image reference, e.g. ghcr.io/foo/bar:nydus-latest +# PLATFORM OCI platform selector for index manifests (e.g. linux/amd64) +# WORK_DIR scratch directory +# +# Output: +# On success, writes the bootstrap blob to "$WORK_DIR/bootstrap" and prints +# that path to stdout. +# +# Notes: +# The bootstrap is identified by the layer annotation +# containerd.io/snapshot/nydus-bootstrap=true +# and falls back to "the first non-blob nydus layer" if the annotation is +# missing (matches the convention used in the repo's e2e workflow). +# The blob is a tar.gz containing image.boot (or *.boot) which is what +# nydusd actually consumes. +set -euo pipefail + +: "${NYDUS_IMAGE:?NYDUS_IMAGE must be set}" +: "${WORK_DIR:?WORK_DIR must be set}" +PLATFORM="${PLATFORM:-linux/amd64}" + +mkdir -p "${WORK_DIR}" +MANIFEST="${WORK_DIR}/manifest.json" +LAYER_TGZ="${WORK_DIR}/bootstrap-layer.tar.gz" +EXTRACT_DIR="${WORK_DIR}/bootstrap-extract" +OUT="${WORK_DIR}/bootstrap" + +echo "[fetch-bootstrap] image=${NYDUS_IMAGE} platform=${PLATFORM}" >&2 + +crane manifest --platform "${PLATFORM}" "${NYDUS_IMAGE}" > "${MANIFEST}" + +# Identify the bootstrap layer. Prefer the explicit annotation, then fall +# back to the first layer whose mediaType mentions "nydus" but not "blob" +# (matches the convention used in this repo's e2e workflow). +BOOTSTRAP_DIGEST=$(jq -r ' + (.layers[]? + | select(.annotations["containerd.io/snapshot/nydus-bootstrap"] == "true") + | .digest) // ( + .layers[]? + | select((.mediaType // "") | (contains("nydus") and (contains("blob") | not))) + | .digest) +' "${MANIFEST}" | head -n1) + +if [ -z "${BOOTSTRAP_DIGEST}" ] || [ "${BOOTSTRAP_DIGEST}" = "null" ]; then + echo "[fetch-bootstrap] ERROR: no bootstrap layer in manifest" >&2 + cat "${MANIFEST}" >&2 + exit 1 +fi + +echo "[fetch-bootstrap] bootstrap layer digest=${BOOTSTRAP_DIGEST}" >&2 +crane blob "${NYDUS_IMAGE}@${BOOTSTRAP_DIGEST}" > "${LAYER_TGZ}" + +rm -rf "${EXTRACT_DIR}" +mkdir -p "${EXTRACT_DIR}" +if ! tar -xf "${LAYER_TGZ}" -C "${EXTRACT_DIR}" 2>/dev/null; then + echo "[fetch-bootstrap] ERROR: failed to untar bootstrap layer" >&2 + file "${LAYER_TGZ}" >&2 || true + exit 1 +fi + +BOOTSTRAP_FILE=$(find "${EXTRACT_DIR}" \( -name 'image.boot' -o -name '*.boot' \) -type f | head -1) +if [ -z "${BOOTSTRAP_FILE}" ]; then + echo "[fetch-bootstrap] ERROR: no .boot file in extracted layer" >&2 + find "${EXTRACT_DIR}" >&2 + exit 1 +fi + +cp "${BOOTSTRAP_FILE}" "${OUT}" +echo "[fetch-bootstrap] wrote ${OUT} ($(stat -c%s "${OUT}") bytes)" >&2 +echo "${OUT}" diff --git a/misc/perftest/workload/go.mod b/misc/perftest/workload/go.mod new file mode 100644 index 00000000000..d2111232cf8 --- /dev/null +++ b/misc/perftest/workload/go.mod @@ -0,0 +1,3 @@ +module github.com/dragonflyoss/nydus/misc/perftest/workload + +go 1.22 diff --git a/misc/perftest/workload/main.go b/misc/perftest/workload/main.go new file mode 100644 index 00000000000..5383eefbed2 --- /dev/null +++ b/misc/perftest/workload/main.go @@ -0,0 +1,232 @@ +// workload runs a cold-cache parallel-read benchmark over a mounted Nydus +// filesystem and writes a JSON summary describing throughput and latency. +// +// It is the data-plane half of the nydus perftest image; the orchestrator +// (entrypoint.sh) is responsible for mounting nydusd before invoking this +// program. We deliberately use stdlib only so the binary can be built +// statically with `CGO_ENABLED=0 go build`. +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "sync" + "sync/atomic" + "time" +) + +type latencyStats struct { + Mean float64 `json:"mean"` + P50 float64 `json:"p50"` + P90 float64 `json:"p90"` + P95 float64 `json:"p95"` + P99 float64 `json:"p99"` +} + +type summary struct { + FilesSeen int `json:"files_seen"` + FilesRead int `json:"files_read"` + FilesSkipped int `json:"files_skipped"` + FilesErrored int `json:"files_errored"` + BytesRead int64 `json:"bytes_read"` + WallClockSec float64 `json:"wall_clock_sec"` + ThroughputMBps float64 `json:"throughput_mbps"` + Parallelism int `json:"parallelism"` + ChunkSize int `json:"chunk_size"` + LatencyMs latencyStats `json:"latency_ms"` + ErrorSamples []string `json:"error_samples"` +} + +func main() { + root := flag.String("root", "", "directory to walk and read") + parallelism := flag.Int("parallelism", 16, "concurrent file readers") + chunkSize := flag.Int("chunk-size", 1<<20, "bytes per read() call") + maxFiles := flag.Int("max-files", 0, "cap files read; 0 = no cap") + output := flag.String("output", "", "summary JSON output path") + flag.Parse() + if *root == "" || *output == "" { + fmt.Fprintln(os.Stderr, "usage: workload --root DIR --output PATH [--parallelism N] [--chunk-size N] [--max-files N]") + os.Exit(2) + } + if *parallelism < 1 { + *parallelism = 1 + } + + files, skipped, err := collectFiles(*root, *maxFiles) + if err != nil { + fmt.Fprintf(os.Stderr, "[workload] walk error: %v\n", err) + os.Exit(1) + } + fmt.Fprintf(os.Stderr, "[workload] %d files to read (%d non-regular skipped)\n", len(files), skipped) + + var ( + bytesRead int64 + errored int64 + latencies = make([]float64, 0, len(files)) + latMu sync.Mutex + errSamples []string + errSamplesMu sync.Mutex + work = make(chan string, len(files)) + wg sync.WaitGroup + ) + for _, p := range files { + work <- p + } + close(work) + + tStart := time.Now() + for w := 0; w < *parallelism; w++ { + wg.Add(1) + go func() { + defer wg.Done() + localBuf := make([]byte, *chunkSize) + for path := range work { + n, lat, rerr := readOne(path, localBuf) + if rerr != nil { + atomic.AddInt64(&errored, 1) + errSamplesMu.Lock() + if len(errSamples) < 10 { + errSamples = append(errSamples, fmt.Sprintf("%s: %v", path, rerr)) + } + errSamplesMu.Unlock() + continue + } + atomic.AddInt64(&bytesRead, n) + latMu.Lock() + latencies = append(latencies, lat.Seconds()) + latMu.Unlock() + } + }() + } + wg.Wait() + elapsed := time.Since(tStart).Seconds() + + s := summary{ + FilesSeen: len(files) + skipped, + FilesRead: len(files) - int(errored), + FilesSkipped: skipped, + FilesErrored: int(errored), + BytesRead: bytesRead, + WallClockSec: round(elapsed, 6), + Parallelism: *parallelism, + ChunkSize: *chunkSize, + ErrorSamples: errSamples, + } + if elapsed > 0 { + s.ThroughputMBps = round(float64(bytesRead)/1_000_000.0/elapsed, 3) + } + s.LatencyMs = computeLatency(latencies) + + out, err := os.Create(*output) + if err != nil { + fmt.Fprintf(os.Stderr, "[workload] cannot write %s: %v\n", *output, err) + os.Exit(1) + } + enc := json.NewEncoder(out) + enc.SetIndent("", " ") + if err := enc.Encode(&s); err != nil { + fmt.Fprintf(os.Stderr, "[workload] encode error: %v\n", err) + os.Exit(1) + } + out.Close() + + fmt.Fprintf(os.Stderr, + "[workload] done: %d files, %d bytes, %.2f MB/s, p95=%.2fms\n", + s.FilesRead, s.BytesRead, s.ThroughputMBps, s.LatencyMs.P95) + + if errored > 0 && s.FilesRead == 0 { + os.Exit(1) + } +} + +func collectFiles(root string, maxFiles int) ([]string, int, error) { + var files []string + skipped := 0 + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + skipped++ + return nil + } + if d.IsDir() { + return nil + } + if !d.Type().IsRegular() { + skipped++ + return nil + } + files = append(files, path) + if maxFiles > 0 && len(files) >= maxFiles { + return filepath.SkipAll + } + return nil + }) + return files, skipped, err +} + +func readOne(path string, buf []byte) (int64, time.Duration, error) { + t0 := time.Now() + f, err := os.Open(path) + if err != nil { + return 0, time.Since(t0), err + } + defer f.Close() + var total int64 + for { + n, rerr := f.Read(buf) + total += int64(n) + if rerr == io.EOF { + break + } + if rerr != nil { + return total, time.Since(t0), rerr + } + } + return total, time.Since(t0), nil +} + +func computeLatency(secs []float64) latencyStats { + if len(secs) == 0 { + return latencyStats{} + } + sorted := make([]float64, len(secs)) + copy(sorted, secs) + sort.Float64s(sorted) + var sum float64 + for _, v := range secs { + sum += v + } + pick := func(p float64) float64 { + idx := int(p * float64(len(sorted)-1)) + if idx < 0 { + idx = 0 + } + if idx >= len(sorted) { + idx = len(sorted) - 1 + } + return sorted[idx] * 1000.0 + } + return latencyStats{ + Mean: round(sum/float64(len(secs))*1000.0, 3), + P50: round(pick(0.50), 3), + P90: round(pick(0.90), 3), + P95: round(pick(0.95), 3), + P99: round(pick(0.99), 3), + } +} + +func round(v float64, places int) float64 { + scale := 1.0 + for i := 0; i < places; i++ { + scale *= 10 + } + if v >= 0 { + return float64(int64(v*scale+0.5)) / scale + } + return float64(int64(v*scale-0.5)) / scale +} From af98d5e25a506cc5311595082575247f068730bb Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 22 Apr 2026 07:35:22 +0000 Subject: [PATCH 02/13] ci/perftest: push perftest image to GHCR and skip rebuild when tag exists Replace local artifact upload/download with direct push/pull to GHCR. Compute a content-addressed tag from the commit SHA and lowercase repo owner, then push the image once and reuse it across jobs. Add a manifest check so the build step is skipped if the tag already exists, saving time on re-runs or multiple workflows on the same commit. Update permissions and login steps so both build and benchmark jobs can read and write packages. Signed-off-by: Peng Tao --- .github/workflows/perftest.yml | 76 ++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/.github/workflows/perftest.yml b/.github/workflows/perftest.yml index 65967ae4799..99ee4a443d3 100644 --- a/.github/workflows/perftest.yml +++ b/.github/workflows/perftest.yml @@ -16,25 +16,61 @@ on: permissions: contents: read + packages: write env: DRAGONFLY_VERSION: "2.4.3" CLIENT_VERSION: "1.3.3" - PERFTEST_IMAGE: "nydus-perftest:ci" + # PERFTEST_IMAGE is computed per-job from the repository owner (lower-cased) + # and the commit SHA so each commit gets a unique, content-addressed tag. jobs: build-image: name: Build perftest image runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} timeout-minutes: 60 + permissions: + contents: read + packages: write + outputs: + image: ${{ steps.meta.outputs.image }} steps: - name: Checkout code uses: actions/checkout@v6 + - name: Compute image reference + id: meta + run: | + # GHCR requires lowercase repository names. + owner_lc="${GITHUB_REPOSITORY_OWNER,,}" + image="ghcr.io/${owner_lc}/nydus-perftest:${GITHUB_SHA}" + echo "image=${image}" >> "$GITHUB_OUTPUT" + echo "Resolved perftest image: ${image}" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Check whether image tag already exists + id: check + run: | + if docker manifest inspect "${{ steps.meta.outputs.image }}" > /dev/null 2>&1; then + echo "exists=true" >> "$GITHUB_OUTPUT" + echo "Image ${{ steps.meta.outputs.image }} already exists in GHCR; skipping build." + else + echo "exists=false" >> "$GITHUB_OUTPUT" + echo "Image ${{ steps.meta.outputs.image }} not found; will build and push." + fi + - name: Set up Docker Buildx + if: steps.check.outputs.exists != 'true' uses: docker/setup-buildx-action@v3 - name: Cache cargo registry / git via Buildx + if: steps.check.outputs.exists != 'true' uses: actions/cache@v4 with: path: | @@ -43,20 +79,24 @@ jobs: restore-keys: | buildx-perftest- - - name: Build perftest image + - name: Build and push perftest image + if: steps.check.outputs.exists != 'true' uses: docker/build-push-action@v6 with: context: . file: misc/perftest/Dockerfile - tags: ${{ env.PERFTEST_IMAGE }} - load: true + tags: ${{ steps.meta.outputs.image }} + push: true build-args: | RUST_TARGET=x86_64-unknown-linux-musl cache-from: type=local,src=~/.cache/buildx-perftest cache-to: type=local,dest=~/.cache/buildx-perftest,mode=max - name: Verify image bundles required binaries + env: + PERFTEST_IMAGE: ${{ steps.meta.outputs.image }} run: | + docker pull "${PERFTEST_IMAGE}" docker run --rm --entrypoint /usr/local/bin/nydusd ${PERFTEST_IMAGE} --version docker run --rm --entrypoint /usr/local/bin/nydusctl ${PERFTEST_IMAGE} --help | head -5 docker run --rm --entrypoint /usr/local/bin/crane ${PERFTEST_IMAGE} version @@ -69,17 +109,6 @@ jobs: ldd "$b" 2>&1 || true done' - - name: Save image - run: | - docker save "${PERFTEST_IMAGE}" -o /tmp/perftest-image.tar - - - name: Upload image artifact - uses: actions/upload-artifact@v6 - with: - name: perftest-image - path: /tmp/perftest-image.tar - retention-days: 1 - dragonfly-download: name: Download Dragonfly binaries runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} @@ -121,21 +150,26 @@ jobs: runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} needs: [build-image, dragonfly-download] timeout-minutes: 30 + permissions: + contents: read + packages: read env: NYDUS_IMAGE: ${{ github.event.inputs.nydus_image || 'ghcr.io/dragonflyoss/image-service/nginx:nydus-latest' }} + PERFTEST_IMAGE: ${{ needs.build-image.outputs.image }} steps: - name: Checkout code uses: actions/checkout@v6 - - name: Download perftest image - uses: actions/download-artifact@v7 + - name: Log in to GHCR + uses: docker/login-action@v3 with: - name: perftest-image - path: /tmp + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Load perftest image + - name: Pull perftest image run: | - docker load -i /tmp/perftest-image.tar + docker pull "${PERFTEST_IMAGE}" docker images | grep nydus-perftest - name: Download Dragonfly artifacts From a9d77ed5e1fc99ca56f47d1b137167b192bfe561 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 22 Apr 2026 10:29:07 +0000 Subject: [PATCH 03/13] ci: add private registry auth support for perftest Enable performance tests to pull nydus images from private registries by injecting registry credentials via REGISTRY_AUTH secret. The workflow now forwards the base64-encoded credentials to the perftest container, and the entrypoint script creates a docker config.json for crane authentication. Security options are also added to allow unconfined apparmor/seccomp for full container privileges during testing. Signed-off-by: Peng Tao --- .github/workflows/perftest.yml | 7 +++++++ misc/perftest/entrypoint.sh | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/.github/workflows/perftest.yml b/.github/workflows/perftest.yml index 99ee4a443d3..0a6917288b3 100644 --- a/.github/workflows/perftest.yml +++ b/.github/workflows/perftest.yml @@ -156,6 +156,10 @@ jobs: env: NYDUS_IMAGE: ${{ github.event.inputs.nydus_image || 'ghcr.io/dragonflyoss/image-service/nginx:nydus-latest' }} PERFTEST_IMAGE: ${{ needs.build-image.outputs.image }} + # Optional. Base64("user:password") string forwarded into the perftest + # container so both `crane` (bootstrap fetch) and the nydusd registry + # backend can authenticate against private NYDUS_IMAGE registries. + REGISTRY_AUTH: ${{ secrets.PERFTEST_REGISTRY_AUTH }} steps: - name: Checkout code uses: actions/checkout@v6 @@ -273,7 +277,10 @@ jobs: --add-host host.docker.internal:host-gateway \ --privileged \ --device /dev/fuse \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ -e NYDUS_IMAGE="${NYDUS_IMAGE}" \ + -e REGISTRY_AUTH="${REGISTRY_AUTH:-}" \ -e DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ -e DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ -e READ_PARALLELISM=8 \ diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh index 5e3ce9c74b1..9a1390413d2 100755 --- a/misc/perftest/entrypoint.sh +++ b/misc/perftest/entrypoint.sh @@ -45,6 +45,31 @@ PREFETCH_THREADS="${PREFETCH_THREADS:-8}" mkdir -p "${WORK_DIR}" "${RESULTS_DIR}" "${BLOB_CACHE_DIR}" "${MOUNT_POINT}" +# If REGISTRY_AUTH is provided (base64 of "user:password"), materialise a +# docker config.json so `crane` (used by fetch-bootstrap) can authenticate +# against private registries. The nydusd registry backend already picks up +# REGISTRY_AUTH via the rendered config below. +if [ -n "${REGISTRY_AUTH}" ]; then + auth_host="${REGISTRY_HOST:-}" + if [ -z "${auth_host}" ] && [ -n "${NYDUS_IMAGE}" ]; then + ref="${NYDUS_IMAGE%@*}"; ref="${ref%:*}" + if [[ "${ref}" == */* ]]; then + first="${ref%%/*}" + if [[ "${first}" == *.* || "${first}" == *:* || "${first}" == "localhost" ]]; then + auth_host="${first}" + fi + fi + auth_host="${auth_host:-docker.io}" + fi + export DOCKER_CONFIG="${WORK_DIR}/.docker" + mkdir -p "${DOCKER_CONFIG}" + jq -n --arg host "${auth_host}" --arg auth "${REGISTRY_AUTH}" \ + '{auths: {($host): {auth: $auth}}}' \ + > "${DOCKER_CONFIG}/config.json" + chmod 600 "${DOCKER_CONFIG}/config.json" + log "Wrote registry credentials for ${auth_host} to ${DOCKER_CONFIG}/config.json" +fi + APISOCK="${WORK_DIR}/api.sock" NYDUSD_LOG="${WORK_DIR}/nydusd.log" RESULT_JSON="${RESULTS_DIR}/result.json" From dde42162fe523d6c52af9a1d331802ae2fe78d84 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 28 Apr 2026 09:03:05 +0000 Subject: [PATCH 04/13] perftest: enable local nydusd binary testing Switch runtime base from Alpine to Ubuntu so a host-built nydusd can be bind-mounted over /usr/local/bin/nydusd without musl/glibc conflicts. Update entrypoint to validate and log the selected binary and version, recording both in result.json and the printed summary. Document the bind-mount workflow in README.md so developers can test daemon changes without rebuilding the perftest image. Signed-off-by: Peng Tao --- misc/perftest/Dockerfile | 31 ++++++++++++++++++++----------- misc/perftest/README.md | 35 +++++++++++++++++++++++++++++++---- misc/perftest/entrypoint.sh | 21 ++++++++++++++++++--- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile index 2db21d058f2..c8c9292ef40 100644 --- a/misc/perftest/Dockerfile +++ b/misc/perftest/Dockerfile @@ -4,9 +4,11 @@ # # This image bundles a statically-linked nydusd (musl) plus a small benchmark # harness that mounts a Nydus image via FUSE and reads every regular file -# in parallel, exercising the Dragonfly SDK proxy path. Dragonfly (dfdaemon -# + scheduler) is expected to be deployed OUTSIDE the container; the proxy -# and scheduler endpoints are passed via environment variables. +# in parallel, exercising the Dragonfly SDK proxy path. Bind-mount a local +# nydusd over /usr/local/bin/nydusd to test a new daemon without rebuilding +# this image. Dragonfly (dfdaemon + scheduler) is expected to be deployed +# OUTSIDE the container; the proxy and scheduler endpoints are passed via +# environment variables. # # See README.md in this directory for usage. @@ -60,23 +62,30 @@ RUN CGO_ENABLED=0 go build -trimpath -ldflags '-s -w' -o /out-workload ./... # ----------------------------------------------------------------------------- # Stage 3: minimal runtime image. # -# Alpine is fine here because nydusd/nydusctl are fully static (musl) and the -# workload binary is a pure-Go static build. The only runtime tooling we need -# beyond bash is fuse3 (for the FUSE mount), jq (for JSON munging), gettext -# (envsubst), curl + tar + ca-certificates (used by crane via TLS). +# Ubuntu keeps the bundled static musl binaries working while also allowing +# /usr/local/bin/nydusd to be replaced by a normal glibc-linked nydusd +# bind-mounted from a local host build. The only runtime tooling we need beyond +# bash is fuse3 (for the FUSE mount), jq (for JSON munging), gettext-base +# (envsubst), curl + tar + ca-certificates (used by crane via TLS), and tini. # ----------------------------------------------------------------------------- -FROM alpine:3.20 +FROM ubuntu:24.04 -RUN apk add --no-cache \ +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ bash \ ca-certificates \ coreutils \ curl \ + file \ fuse3 \ - gettext \ + gettext-base \ jq \ + libgcc-s1 \ tar \ tini \ + util-linux \ + zlib1g \ + && rm -rf /var/lib/apt/lists/* \ && mkdir -p /etc/nydus /mnt/nydus /results /var/lib/nydus/cache /tmp/nydus COPY --from=builder /out-nydusd /usr/local/bin/nydusd @@ -108,5 +117,5 @@ ENV NYDUS_IMAGE="ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" \ PLATFORM="linux/amd64" VOLUME ["/results"] -ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/perftest-entrypoint"] +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/perftest-entrypoint"] CMD [] diff --git a/misc/perftest/README.md b/misc/perftest/README.md index ecfa12b5e42..7eb9d8463d9 100644 --- a/misc/perftest/README.md +++ b/misc/perftest/README.md @@ -5,8 +5,9 @@ benchmarks cold-cache parallel reads through **Dragonfly proxy SDK mode**. Dragonfly itself (dfdaemon + scheduler) is expected to run **outside** this container. The proxy and scheduler endpoints are passed in via environment -variables; the nydusd binary is built statically (musl) so the image runs on -any Linux host without glibc / kernel-userland coupling. +variables. The image bundles a static nydusd by default; bind-mount a locally +built nydusd over `/usr/local/bin/nydusd` when comparing daemon changes without +rebuilding the image. ## What it measures @@ -50,8 +51,9 @@ The Dockerfile has three stages: 2. A Go builder (`golang:1.22-alpine`) that produces a static `crane` binary (for image manifest / bootstrap resolution) and a static `workload` binary built from `misc/perftest/workload/`. -3. An alpine runtime containing only `nydusd`, `nydusctl`, `crane`, - `workload`, plus `bash`, `fuse3`, `jq`, `gettext`, `tar`, and `tini`. +3. An Ubuntu runtime containing `nydusd`, `nydusctl`, `crane`, `workload`, + plus `bash`, `fuse3`, `jq`, `gettext-base`, `tar`, `tini`, and the + libraries needed by normal glibc-linked local nydusd builds. For arm64 hosts, set `--build-arg RUST_TARGET=aarch64-unknown-linux-musl` (or `make perftest-image RUST_TARGET_STATIC=aarch64-unknown-linux-musl`). @@ -83,6 +85,29 @@ The summary is written to `./results/result.json` and printed to stderr. ## Configuration +### Using a locally built nydusd + +By default the harness runs the bundled `/usr/local/bin/nydusd`. To test a +locally built daemon without rebuilding the perftest image, bind-mount it over +that path: + +```bash +docker run --rm \ + --privileged \ + --device /dev/fuse \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ + --add-host host.docker.internal:host-gateway \ + -e NYDUS_IMAGE=ghcr.io/dragonflyoss/image-service/nginx:nydus-latest \ + -v "$PWD/target/release/nydusd:/usr/local/bin/nydusd:ro" \ + -v "$PWD/results:/results" \ + nydus-perftest:latest +``` + +The entrypoint validates `/usr/local/bin/nydusd` before fetching the bootstrap, +logs `nydusd --version`, and records the selected binary and version in +`result.json` and the printed summary. + ### Option A: bring your own nydusd config (recommended for real workloads) ```bash @@ -151,6 +176,8 @@ bootstrap file and set `BOOTSTRAP_PATH=/path/to/bootstrap`. "latency_ms": { "mean": 18.4, "p50": 9.1, "p90": 41.7, "p95": 63.2, "p99": 121.0 } }, "workload_rc": 0, "nydusd": { + "binary": "/usr/local/bin/nydusd", + "version": "Version: ...", "info": { ... }, // nydusctl info "backend": { ... }, // backend metrics: bytes pulled, request count "blobcache": { ... }, // cache hit/miss diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh index 9a1390413d2..b2fa9995990 100755 --- a/misc/perftest/entrypoint.sh +++ b/misc/perftest/entrypoint.sh @@ -9,7 +9,7 @@ # 2. Resolve the bootstrap: # - If $BOOTSTRAP_PATH is set and exists, use it. # - Otherwise, fetch from $NYDUS_IMAGE via crane (see fetch-bootstrap). -# 3. Start nydusd in FUSE mode with --apisock for telemetry. +# 3. Start /usr/local/bin/nydusd in FUSE mode with --apisock for telemetry. # 4. Wait for FUSE to be mounted AND nydusd to report state RUNNING. # 5. Run the parallel-read workload over $MOUNT_POINT. # 6. Scrape nydusd metrics and emit a JSON summary to $RESULTS_DIR/result.json. @@ -42,9 +42,18 @@ REGISTRY_SKIP_VERIFY="${REGISTRY_SKIP_VERIFY:-false}" PROXY_FALLBACK="${PROXY_FALLBACK:-true}" PREFETCH_ENABLE="${PREFETCH_ENABLE:-false}" PREFETCH_THREADS="${PREFETCH_THREADS:-8}" +NYDUSD="/usr/local/bin/nydusd" mkdir -p "${WORK_DIR}" "${RESULTS_DIR}" "${BLOB_CACHE_DIR}" "${MOUNT_POINT}" +if [ ! -x "${NYDUSD}" ]; then + die "nydusd is not executable: ${NYDUSD}" +fi +if ! NYDUSD_VERSION="$("${NYDUSD}" --version 2>&1 | tr '\n' ' ')"; then + die "failed to execute ${NYDUSD}: ${NYDUSD_VERSION}" +fi +log "Using nydusd binary: ${NYDUSD} (${NYDUSD_VERSION})" + # If REGISTRY_AUTH is provided (base64 of "user:password"), materialise a # docker config.json so `crane` (used by fetch-bootstrap) can authenticate # against private registries. The nydusd registry backend already picks up @@ -123,10 +132,10 @@ else fi # ---- Phase 3: start nydusd ------------------------------------------------- -log "Starting nydusd: bootstrap=${BOOTSTRAP_PATH} mountpoint=${MOUNT_POINT}" +log "Starting nydusd: binary=${NYDUSD} bootstrap=${BOOTSTRAP_PATH} mountpoint=${MOUNT_POINT}" T_DAEMON_START=$(date +%s.%N) -nydusd \ +"${NYDUSD}" \ --config "${CONFIG_PATH}" \ --bootstrap "${BOOTSTRAP_PATH}" \ --mountpoint "${MOUNT_POINT}" \ @@ -214,6 +223,8 @@ jq -n \ --arg platform "${PLATFORM}" \ --arg config_path "${CONFIG_PATH}" \ --arg bootstrap_path "${BOOTSTRAP_PATH}" \ + --arg nydusd_bin "${NYDUSD}" \ + --arg nydusd_version "${NYDUSD_VERSION}" \ --arg proxy_url "${DRAGONFLY_PROXY_URL}" \ --arg scheduler "${DRAGONFLY_SCHEDULER_ENDPOINT}" \ --argjson proxy_fb "$([ "${PROXY_FALLBACK}" = "true" ] && echo true || echo false)" \ @@ -235,6 +246,8 @@ jq -n \ workload_rc: $workload_rc, workload: ($workload[0] // {}), nydusd: { + binary: $nydusd_bin, + version: $nydusd_version, info: ($info[0] // {}), backend: ($backend[0] // {}), blobcache: ($blobcache[0] // {}), @@ -246,6 +259,8 @@ log "Wrote summary to ${RESULT_JSON}" echo "================ PERF TEST SUMMARY ================" >&2 jq -r ' "image : \(.image)", + "nydusd_binary : \(.nydusd.binary)", + "nydusd_version : \(.nydusd.version)", "mount_ready_sec : \(.timing_sec.mount_ready)", "workload_sec : \(.timing_sec.workload)", "files_read : \(.workload.files_read // 0) (skipped=\(.workload.files_skipped // 0), errors=\(.workload.files_errored // 0))", From 36736e0fb1adf4e7c03f26709942c98e296e7dee Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 30 Apr 2026 02:08:19 +0000 Subject: [PATCH 05/13] perftest: add Dragonfly streaming prefetch support and Kubernetes pod manifest Add environment variables and config template support for Dragonfly streaming prefetch feature, including thread count, bandwidth limit, and retry settings. Also add DIGEST_VALIDATE option for RAFS metadata validation. Create a comprehensive Kubernetes pod manifest (pod.yaml) that demonstrates all perftest configuration options with proper Dragonfly service endpoints for cluster deployments. Signed-off-by: Peng Tao --- misc/perftest/Dockerfile | 5 ++ misc/perftest/README.md | 10 +++ misc/perftest/config.template.json | 8 +- misc/perftest/entrypoint.sh | 9 ++- misc/perftest/pod.yaml | 123 +++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 misc/perftest/pod.yaml diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile index c8c9292ef40..0256b2e12a1 100644 --- a/misc/perftest/Dockerfile +++ b/misc/perftest/Dockerfile @@ -105,6 +105,11 @@ ENV NYDUS_IMAGE="ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" \ DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ REGISTRY_SCHEME="https" \ + DIGEST_VALIDATE="false" \ + STREAM_PREFETCH="false" \ + STREAM_PREFETCH_THREADS="5" \ + STREAM_PREFETCH_BANDWIDTH="0" \ + STREAM_PREFETCH_MAX_RETRY="10" \ MOUNT_POINT="/mnt/nydus" \ RESULTS_DIR="/results" \ WORK_DIR="/tmp/nydus" \ diff --git a/misc/perftest/README.md b/misc/perftest/README.md index 7eb9d8463d9..179b8ecf0df 100644 --- a/misc/perftest/README.md +++ b/misc/perftest/README.md @@ -83,6 +83,11 @@ needed on Linux to make `host.docker.internal` resolve to the host gateway. The summary is written to `./results/result.json` and printed to stderr. +For Kubernetes, `misc/perftest/pod.yaml` provides a Pod example with every +perftest environment option listed explicitly. Update the image, Dragonfly +service endpoints, registry credentials, and optional local nydusd hostPath +before applying it. + ## Configuration ### Using a locally built nydusd @@ -137,9 +142,14 @@ env vars (defaults shown): | `DRAGONFLY_PROXY_URL` | `http://host.docker.internal:4001` | dfdaemon proxy listen URL. | | `DRAGONFLY_SCHEDULER_ENDPOINT` | `http://host.docker.internal:8002` | Non-empty value enables SDK mode. | | `PROXY_FALLBACK` | `true` | Fall back to direct registry if proxy is unhealthy. | +| `DIGEST_VALIDATE` | `false` | Enable RAFS metadata digest validation. | | `BLOB_CACHE_DIR` | `/var/lib/nydus/cache` | nydusd blobcache work_dir. | | `PREFETCH_ENABLE` | `false` | Background prefetch threads. | | `PREFETCH_THREADS` | `8` | | +| `STREAM_PREFETCH` | `false` | Enable Dragonfly streaming blob prefetch. | +| `STREAM_PREFETCH_THREADS` | `5` | Concurrent streaming prefetch workers. | +| `STREAM_PREFETCH_BANDWIDTH` | `0` | Streaming prefetch bandwidth limit in bytes/sec; 0 uses nydusd default. | +| `STREAM_PREFETCH_MAX_RETRY` | `10` | Maximum retry attempts per blob for streaming prefetch. | ### Workload knobs diff --git a/misc/perftest/config.template.json b/misc/perftest/config.template.json index da7a31d02ac..bec44dc80b5 100644 --- a/misc/perftest/config.template.json +++ b/misc/perftest/config.template.json @@ -30,13 +30,17 @@ } }, "mode": "direct", - "digest_validate": false, + "digest_validate": ${DIGEST_VALIDATE}, "iostats_files": false, "enable_xattr": true, "fs_prefetch": { "enable": ${PREFETCH_ENABLE}, "threads_count": ${PREFETCH_THREADS}, "merging_size": 131072, - "bandwidth_rate": 0 + "bandwidth_rate": 0, + "stream_prefetch": ${STREAM_PREFETCH}, + "stream_prefetch_threads": ${STREAM_PREFETCH_THREADS}, + "stream_prefetch_bandwidth": ${STREAM_PREFETCH_BANDWIDTH}, + "stream_prefetch_max_retry": ${STREAM_PREFETCH_MAX_RETRY} } } diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh index b2fa9995990..9584314cab9 100755 --- a/misc/perftest/entrypoint.sh +++ b/misc/perftest/entrypoint.sh @@ -40,8 +40,13 @@ REGISTRY_SCHEME="${REGISTRY_SCHEME:-https}" REGISTRY_AUTH="${REGISTRY_AUTH:-}" REGISTRY_SKIP_VERIFY="${REGISTRY_SKIP_VERIFY:-false}" PROXY_FALLBACK="${PROXY_FALLBACK:-true}" +DIGEST_VALIDATE="${DIGEST_VALIDATE:-false}" PREFETCH_ENABLE="${PREFETCH_ENABLE:-false}" PREFETCH_THREADS="${PREFETCH_THREADS:-8}" +STREAM_PREFETCH="${STREAM_PREFETCH:-false}" +STREAM_PREFETCH_THREADS="${STREAM_PREFETCH_THREADS:-5}" +STREAM_PREFETCH_BANDWIDTH="${STREAM_PREFETCH_BANDWIDTH:-0}" +STREAM_PREFETCH_MAX_RETRY="${STREAM_PREFETCH_MAX_RETRY:-10}" NYDUSD="/usr/local/bin/nydusd" mkdir -p "${WORK_DIR}" "${RESULTS_DIR}" "${BLOB_CACHE_DIR}" "${MOUNT_POINT}" @@ -112,7 +117,9 @@ else export REGISTRY_HOST REGISTRY_REPO REGISTRY_SCHEME REGISTRY_AUTH \ REGISTRY_SKIP_VERIFY PROXY_FALLBACK \ DRAGONFLY_PROXY_URL DRAGONFLY_SCHEDULER_ENDPOINT \ - BLOB_CACHE_DIR PREFETCH_ENABLE PREFETCH_THREADS + BLOB_CACHE_DIR DIGEST_VALIDATE PREFETCH_ENABLE PREFETCH_THREADS \ + STREAM_PREFETCH STREAM_PREFETCH_THREADS STREAM_PREFETCH_BANDWIDTH \ + STREAM_PREFETCH_MAX_RETRY CONFIG_PATH="${WORK_DIR}/nydusd.json" envsubst < /etc/nydus/config.template.json > "${CONFIG_PATH}" diff --git a/misc/perftest/pod.yaml b/misc/perftest/pod.yaml new file mode 100644 index 00000000000..57e404f5864 --- /dev/null +++ b/misc/perftest/pod.yaml @@ -0,0 +1,123 @@ +apiVersion: v1 +kind: Pod +metadata: + name: nydus-perftest + labels: + app: nydus-perftest +spec: + restartPolicy: Never + containers: + - name: perftest + image: nydus-perftest:latest + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + env: + # Image/bootstrap/config selection. + - name: NYDUS_IMAGE + value: "ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" + - name: NYDUSD_CONFIG + value: "" + - name: BOOTSTRAP_PATH + value: "" + - name: PLATFORM + value: "linux/amd64" + + # Registry and Dragonfly proxy config used when NYDUSD_CONFIG is empty. + - name: REGISTRY_HOST + value: "" + - name: REGISTRY_REPO + value: "" + - name: REGISTRY_SCHEME + value: "https" + - name: REGISTRY_AUTH + value: "" + - name: REGISTRY_SKIP_VERIFY + value: "false" + - name: DRAGONFLY_PROXY_URL + value: "http://dragonfly-dfdaemon.dragonfly-system.svc.cluster.local:4001" + - name: DRAGONFLY_SCHEDULER_ENDPOINT + value: "http://dragonfly-scheduler.dragonfly-system.svc.cluster.local:8002" + - name: PROXY_FALLBACK + value: "true" + - name: DIGEST_VALIDATE + value: "false" + - name: PREFETCH_ENABLE + value: "false" + - name: PREFETCH_THREADS + value: "8" + - name: STREAM_PREFETCH + value: "false" + - name: STREAM_PREFETCH_THREADS + value: "5" + - name: STREAM_PREFETCH_BANDWIDTH + value: "0" + - name: STREAM_PREFETCH_MAX_RETRY + value: "10" + + # Runtime paths. + - name: MOUNT_POINT + value: "/mnt/nydus" + - name: RESULTS_DIR + value: "/results" + - name: WORK_DIR + value: "/tmp/nydus" + - name: BLOB_CACHE_DIR + value: "/var/lib/nydus/cache" + + # Workload knobs. + - name: READ_PARALLELISM + value: "16" + - name: READ_CHUNK_SIZE + value: "1048576" + - name: MAX_FILES + value: "0" + - name: MOUNT_READY_TIMEOUT + value: "60" + - name: NYDUSD_LOG_LEVEL + value: "info" + volumeMounts: + - name: dev-fuse + mountPath: /dev/fuse + - name: results + mountPath: /results + - name: cache + mountPath: /var/lib/nydus/cache + - name: work + mountPath: /tmp/nydus + # To test a locally built daemon, mount it over the bundled binary: + # - name: local-nydusd + # mountPath: /usr/local/bin/nydusd + # readOnly: true + # If NYDUSD_CONFIG or BOOTSTRAP_PATH is set, mount the referenced file: + # - name: nydusd-config + # mountPath: /etc/nydus/user.json + # subPath: user.json + # readOnly: true + # - name: bootstrap + # mountPath: /bootstrap/image.boot + # subPath: image.boot + # readOnly: true + volumes: + - name: dev-fuse + hostPath: + path: /dev/fuse + type: CharDevice + - name: results + emptyDir: {} + - name: cache + emptyDir: {} + - name: work + emptyDir: {} + # To test a locally built daemon, uncomment this hostPath volume and the + # matching mount above. The file must exist on the node that runs the pod. + # - name: local-nydusd + # hostPath: + # path: /path/on/node/target/release/nydusd + # type: File + # - name: nydusd-config + # configMap: + # name: nydus-perftest-config + # - name: bootstrap + # configMap: + # name: nydus-perftest-bootstrap From 54762d1cfa862b8c0f12d1a0339e58e125b36583 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 30 Apr 2026 02:09:57 +0000 Subject: [PATCH 06/13] ci: enable digest validation and stream prefetch in perftest Add DIGEST_VALIDATE=true and STREAM_PREFETCH=true environment variables to the performance test container configuration to enable additional validation and optimization features during testing. Signed-off-by: Peng Tao --- .github/workflows/perftest.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/perftest.yml b/.github/workflows/perftest.yml index 0a6917288b3..2fbf9a304e0 100644 --- a/.github/workflows/perftest.yml +++ b/.github/workflows/perftest.yml @@ -286,6 +286,8 @@ jobs: -e READ_PARALLELISM=8 \ -e MAX_FILES=200 \ -e MOUNT_READY_TIMEOUT=120 \ + -e DIGEST_VALIDATE=true \ + -e STREAM_PREFETCH=true \ -v /tmp/perftest-results:/results \ "${PERFTEST_IMAGE}" From 39cc2acec840b48cc6c9ce0ef8a53271d85f03a0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 30 Apr 2026 03:40:02 +0000 Subject: [PATCH 07/13] perftest: enable hostNetwork for registry and SDK access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add hostNetwork to the example pod so the benchmark container can reach external registries and the Dragonfly SDK can discover its local IP address. This avoids CNI configurations that might block outbound traffic or hide the pod’s routable address. Install iproute2 in the image so tools like ip and ss are available for network diagnostics inside the container. Update documentation to explain when and why hostNetwork is needed and remind users to remove it only if their pod network already provides a working default route. Signed-off-by: Peng Tao --- misc/perftest/Dockerfile | 1 + misc/perftest/README.md | 4 +++- misc/perftest/pod.yaml | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile index 0256b2e12a1..0ad84416ba4 100644 --- a/misc/perftest/Dockerfile +++ b/misc/perftest/Dockerfile @@ -79,6 +79,7 @@ RUN apt-get update \ file \ fuse3 \ gettext-base \ + iproute2 \ jq \ libgcc-s1 \ tar \ diff --git a/misc/perftest/README.md b/misc/perftest/README.md index 179b8ecf0df..1791cb4a478 100644 --- a/misc/perftest/README.md +++ b/misc/perftest/README.md @@ -86,7 +86,9 @@ The summary is written to `./results/result.json` and printed to stderr. For Kubernetes, `misc/perftest/pod.yaml` provides a Pod example with every perftest environment option listed explicitly. Update the image, Dragonfly service endpoints, registry credentials, and optional local nydusd hostPath -before applying it. +before applying it. The example enables `hostNetwork` so registry access and +Dragonfly SDK local-IP discovery use the node network; remove it only if your +pod network provides a working default route. ## Configuration diff --git a/misc/perftest/pod.yaml b/misc/perftest/pod.yaml index 57e404f5864..3fd0aa8392a 100644 --- a/misc/perftest/pod.yaml +++ b/misc/perftest/pod.yaml @@ -5,6 +5,11 @@ metadata: labels: app: nydus-perftest spec: + # The perftest needs outbound registry access and Dragonfly SDK mode needs a + # discoverable local IP address. hostNetwork avoids CNI/network-policy setups + # that leave this privileged benchmark pod without a default route. + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet restartPolicy: Never containers: - name: perftest From 5497d5da08bce7dd5c12b3a7e50fdd7db8e1658c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 30 Apr 2026 03:59:04 +0000 Subject: [PATCH 08/13] perftest: add busybox to test container Add busybox package to the performance test container to support additional shell utilities needed for test scenarios and debugging. Signed-off-by: Peng Tao --- misc/perftest/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile index 0ad84416ba4..6be988329a9 100644 --- a/misc/perftest/Dockerfile +++ b/misc/perftest/Dockerfile @@ -86,6 +86,7 @@ RUN apt-get update \ tini \ util-linux \ zlib1g \ + busybox \ && rm -rf /var/lib/apt/lists/* \ && mkdir -p /etc/nydus /mnt/nydus /results /var/lib/nydus/cache /tmp/nydus From e8e9f0fc34d94c08eb51cdf79edaf5826d5a2953 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 30 Apr 2026 08:58:54 +0000 Subject: [PATCH 09/13] perftest: add default route workaround for pod networks Add ENABLE_DEFAULT_ROUTE_WORKAROUND option to automatically create a default IPv4 route when the pod has only endpoint-specific routes. This fixes Dragonfly SDK local IP discovery in environments without a default route. The workaround derives the default route from existing routes to Dragonfly scheduler, proxy, or registry endpoints. It requires CAP_NET_ADMIN capability or privileged container mode to modify routing tables. Signed-off-by: Peng Tao --- misc/perftest/Dockerfile | 1 + misc/perftest/README.md | 6 +- misc/perftest/entrypoint.sh | 110 ++++++++++++++++++++++++++++++++++++ misc/perftest/pod.yaml | 2 + 4 files changed, 118 insertions(+), 1 deletion(-) diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile index 6be988329a9..4d033e1c3fd 100644 --- a/misc/perftest/Dockerfile +++ b/misc/perftest/Dockerfile @@ -107,6 +107,7 @@ ENV NYDUS_IMAGE="ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" \ DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ REGISTRY_SCHEME="https" \ + ENABLE_DEFAULT_ROUTE_WORKAROUND="false" \ DIGEST_VALIDATE="false" \ STREAM_PREFETCH="false" \ STREAM_PREFETCH_THREADS="5" \ diff --git a/misc/perftest/README.md b/misc/perftest/README.md index 1791cb4a478..d2316eb81bb 100644 --- a/misc/perftest/README.md +++ b/misc/perftest/README.md @@ -88,7 +88,10 @@ perftest environment option listed explicitly. Update the image, Dragonfly service endpoints, registry credentials, and optional local nydusd hostPath before applying it. The example enables `hostNetwork` so registry access and Dragonfly SDK local-IP discovery use the node network; remove it only if your -pod network provides a working default route. +pod network provides a working default route. If the pod has endpoint-specific +routes but no default IPv4 route, set `ENABLE_DEFAULT_ROUTE_WORKAROUND=true` +to let the entrypoint add a default route derived from the Dragonfly or registry +route; this requires `CAP_NET_ADMIN` or a privileged container. ## Configuration @@ -144,6 +147,7 @@ env vars (defaults shown): | `DRAGONFLY_PROXY_URL` | `http://host.docker.internal:4001` | dfdaemon proxy listen URL. | | `DRAGONFLY_SCHEDULER_ENDPOINT` | `http://host.docker.internal:8002` | Non-empty value enables SDK mode. | | `PROXY_FALLBACK` | `true` | Fall back to direct registry if proxy is unhealthy. | +| `ENABLE_DEFAULT_ROUTE_WORKAROUND` | `false` | Add a default IPv4 route derived from Dragonfly/registry endpoint routes when none exists. Requires `CAP_NET_ADMIN` or privileged mode. | | `DIGEST_VALIDATE` | `false` | Enable RAFS metadata digest validation. | | `BLOB_CACHE_DIR` | `/var/lib/nydus/cache` | nydusd blobcache work_dir. | | `PREFETCH_ENABLE` | `false` | Background prefetch threads. | diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh index 9584314cab9..e133163a2b9 100755 --- a/misc/perftest/entrypoint.sh +++ b/misc/perftest/entrypoint.sh @@ -19,6 +19,110 @@ set -euo pipefail log() { printf '[perftest] %s\n' "$*" >&2; } die() { printf '[perftest] ERROR: %s\n' "$*" >&2; exit 1; } +is_ipv4() { + [[ "$1" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] +} + +endpoint_host() { + local endpoint="$1" + local authority host + + [ -n "${endpoint}" ] || return 1 + authority="${endpoint#*://}" + authority="${authority%%/*}" + if [[ "${authority}" == \[*\]* ]]; then + host="${authority#\[}" + host="${host%%\]*}" + else + host="${authority%%:*}" + fi + [ -n "${host}" ] || return 1 + printf '%s\n' "${host}" +} + +resolve_ipv4() { + local host="$1" + local ip="" + + if is_ipv4 "${host}"; then + printf '%s\n' "${host}" + return 0 + fi + + if command -v getent >/dev/null 2>&1; then + ip="$(getent ahostsv4 "${host}" 2>/dev/null | awk 'NR == 1 {print $1}' || true)" + fi + if [ -z "${ip}" ] && command -v nslookup >/dev/null 2>&1; then + ip="$(nslookup "${host}" 2>/dev/null | awk ' + /^Address[[:space:]]+[0-9]+: / {print $3; exit} + /^Address: / && $2 !~ /:53$/ {print $2; exit} + ' || true)" + fi + [ -n "${ip}" ] || return 1 + printf '%s\n' "${ip}" +} + +ensure_default_ipv4_route() { + local endpoint host ip route gateway="" dev="" + local -a fields + + command -v ip >/dev/null 2>&1 || { + log "ip command not found; skipping default route workaround" + return + } + [ -z "$(ip -4 route show default 2>/dev/null)" ] || return + + log "No default IPv4 route found; trying to derive one from reachable endpoints" + for endpoint in "${DRAGONFLY_SCHEDULER_ENDPOINT}" "${DRAGONFLY_PROXY_URL}" "${REGISTRY_HOST:-}"; do + host="$(endpoint_host "${endpoint}" 2>/dev/null || true)" + [ -n "${host}" ] || continue + ip="$(resolve_ipv4 "${host}" 2>/dev/null || true)" + [ -n "${ip}" ] || { + log "Could not resolve IPv4 address for ${host}; skipping" + continue + } + + route="$(ip -4 route get "${ip}" 2>/dev/null || true)" + route="${route%%$'\n'*}" + [ -n "${route}" ] || { + log "No IPv4 route to ${host} (${ip}); skipping" + continue + } + + fields=(${route}) + gateway="" + dev="" + for ((i = 0; i < ${#fields[@]}; i++)); do + case "${fields[$i]}" in + via) + gateway="${fields[$((i + 1))]:-}" + ;; + dev) + dev="${fields[$((i + 1))]:-}" + ;; + esac + done + + [ -n "${dev}" ] || { + log "Route to ${host} (${ip}) has no device: ${route}" + continue + } + + if [ -n "${gateway}" ]; then + if ip route add default via "${gateway}" dev "${dev}" 2>/dev/null; then + log "Added default IPv4 route via ${gateway} dev ${dev} (derived from ${host}/${ip})" + return + fi + elif ip route add default dev "${dev}" 2>/dev/null; then + log "Added default IPv4 route dev ${dev} (derived from ${host}/${ip})" + return + fi + log "Failed to add default route from ${host}/${ip}: ${route}" + done + + log "No default IPv4 route could be derived; Dragonfly SDK local IP discovery may fail" +} + # ---- Inputs ---------------------------------------------------------------- NYDUS_IMAGE="${NYDUS_IMAGE:-}" NYDUSD_CONFIG="${NYDUSD_CONFIG:-}" @@ -40,6 +144,7 @@ REGISTRY_SCHEME="${REGISTRY_SCHEME:-https}" REGISTRY_AUTH="${REGISTRY_AUTH:-}" REGISTRY_SKIP_VERIFY="${REGISTRY_SKIP_VERIFY:-false}" PROXY_FALLBACK="${PROXY_FALLBACK:-true}" +ENABLE_DEFAULT_ROUTE_WORKAROUND="${ENABLE_DEFAULT_ROUTE_WORKAROUND:-false}" DIGEST_VALIDATE="${DIGEST_VALIDATE:-false}" PREFETCH_ENABLE="${PREFETCH_ENABLE:-false}" PREFETCH_THREADS="${PREFETCH_THREADS:-8}" @@ -127,6 +232,11 @@ else log " registry: ${REGISTRY_SCHEME}://${REGISTRY_HOST}/${REGISTRY_REPO}" log " proxy: ${DRAGONFLY_PROXY_URL} scheduler: ${DRAGONFLY_SCHEDULER_ENDPOINT}" fi +if [ "${ENABLE_DEFAULT_ROUTE_WORKAROUND}" = "true" ]; then + ensure_default_ipv4_route +else + log "Default IPv4 route workaround disabled" +fi # ---- Phase 2: resolve bootstrap ------------------------------------------- if [ -n "${BOOTSTRAP_PATH}" ] && [ -f "${BOOTSTRAP_PATH}" ]; then diff --git a/misc/perftest/pod.yaml b/misc/perftest/pod.yaml index 3fd0aa8392a..505021df59b 100644 --- a/misc/perftest/pod.yaml +++ b/misc/perftest/pod.yaml @@ -45,6 +45,8 @@ spec: value: "http://dragonfly-scheduler.dragonfly-system.svc.cluster.local:8002" - name: PROXY_FALLBACK value: "true" + - name: ENABLE_DEFAULT_ROUTE_WORKAROUND + value: "false" - name: DIGEST_VALIDATE value: "false" - name: PREFETCH_ENABLE From 8450cc8e55cf4ea5dc499943b22b5a6f79894d40 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 8 May 2026 09:37:06 +0000 Subject: [PATCH 10/13] perftest: add cache and backend I/O metrics to report Extend the performance test report with detailed cache hit ratio, backend I/O size distribution, average latency, total fetched data, and network efficiency metrics. These additions help analyze nydusd blob cache effectiveness and backend storage access patterns. Signed-off-by: Peng Tao --- misc/perftest/entrypoint.sh | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh index e133163a2b9..79b4a5feb30 100755 --- a/misc/perftest/entrypoint.sh +++ b/misc/perftest/entrypoint.sh @@ -384,6 +384,48 @@ jq -r ' "bytes_read : \(.workload.bytes_read // 0)", "throughput_MBps : \(.workload.throughput_mbps // 0)", "latency_ms p50/p95/p99 : \(.workload.latency_ms.p50 // 0) / \(.workload.latency_ms.p95 // 0) / \(.workload.latency_ms.p99 // 0)", + "cache_hit_ratio : \( + if (.nydusd.blobcache.total // 0) > 0 then + ((.nydusd.blobcache.partial_hits // 0) + (.nydusd.blobcache.whole_hits // 0)) + / .nydusd.blobcache.total * 100 + | . * 10 | round / 10 | tostring + "%" + else "n/a (no cache reads)" + end + ) (partial=\(.nydusd.blobcache.partial_hits // 0), whole=\(.nydusd.blobcache.whole_hits // 0), total=\(.nydusd.blobcache.total // 0))", + "backend_io_size p50/p95/p99 : \( + (.nydusd.backend.read_count_block_size_dist // [0,0,0,0,0,0,0,0]) as $d | + ($d | add // 0) as $n | + if $n == 0 then "n/a (no backend reads)" + else + (reduce range(8) as $i ({cum: [], s: 0}; .s += $d[$i] | .cum += [.s])) | + .cum as $c | + (["<1K","1-4K","4-16K","16-64K","64-128K","128-512K","512K-1M",">=1M"]) as $lbl | + (($n * 0.50) as $t | first(range(8) | select($c[.] >= $t))) as $p50 | + (($n * 0.95) as $t | first(range(8) | select($c[.] >= $t))) as $p95 | + (($n * 0.99) as $t | first(range(8) | select($c[.] >= $t))) as $p99 | + "\($lbl[$p50]) / \($lbl[$p95]) / \($lbl[$p99])" + end + ) (total_backend_reads=\((.nydusd.backend.read_count // 0)))", + "backend_avg_latency_ms : \( + (.nydusd.backend.read_count // 0) as $rc | + if $rc > 0 then + (.nydusd.backend.read_cumulative_latency_millis_total // 0) / $rc + | . * 10 | round / 10 + else "n/a (no backend reads)" + end + )", + "backend_fetch_MB : \( + (.nydusd.backend.read_amount_total // 0) / 1000000 + | . * 100 | round / 100 + )", + "network_efficiency: \( + (.nydusd.backend.read_amount_total // 0) as $net | + (.nydusd.fs.data_read // 0) as $app | + if $net > 0 then + ($app / $net | . * 100 | round / 100 | tostring) + "x (app_bytes=\($app), backend_bytes=\($net))" + else "n/a (no backend reads)" + end + )", "workload_rc : \(.workload_rc)" ' "${RESULT_JSON}" >&2 echo "===================================================" >&2 From 065589c1ce52bde0015478700b253b198fefee80 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 9 May 2026 02:15:42 +0000 Subject: [PATCH 11/13] perftest: add detailed prefetch and cache metrics reporting Enhance the performance test output with comprehensive prefetch statistics and cache analysis. The new metrics include prefetch data amount, average merge size, latency, bandwidth, and IO breakdown between prefetch and on-demand reads. Add cache entries count and backend error tracking to provide better visibility into system behavior during performance testing. Signed-off-by: Peng Tao --- misc/perftest/entrypoint.sh | 67 ++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh index 79b4a5feb30..799fb220987 100755 --- a/misc/perftest/entrypoint.sh +++ b/misc/perftest/entrypoint.sh @@ -384,6 +384,7 @@ jq -r ' "bytes_read : \(.workload.bytes_read // 0)", "throughput_MBps : \(.workload.throughput_mbps // 0)", "latency_ms p50/p95/p99 : \(.workload.latency_ms.p50 // 0) / \(.workload.latency_ms.p95 // 0) / \(.workload.latency_ms.p99 // 0)", + "--- blob cache ---", "cache_hit_ratio : \( if (.nydusd.blobcache.total // 0) > 0 then ((.nydusd.blobcache.partial_hits // 0) + (.nydusd.blobcache.whole_hits // 0)) @@ -392,6 +393,70 @@ jq -r ' else "n/a (no cache reads)" end ) (partial=\(.nydusd.blobcache.partial_hits // 0), whole=\(.nydusd.blobcache.whole_hits // 0), total=\(.nydusd.blobcache.total // 0))", + "cache_entries : \(.nydusd.blobcache.entries_count // 0) chunks", + "--- prefetch ---", + "prefetch_data_MB : \( + (.nydusd.blobcache.prefetch_data_amount // 0) as $pd | + (.nydusd.backend.read_amount_total // 0) as $net | + ($pd / 1000000 | . * 100 | round / 100 | tostring) + + if $net > 0 and $pd > 0 then + " (\($pd / $net * 100 | . * 10 | round / 10)% of backend traffic)" + else "" + end + )", + "prefetch_avg_merge_KB : \( + (.nydusd.blobcache.prefetch_requests_count // 0) as $rc | + if $rc > 0 then + (.nydusd.blobcache.prefetch_data_amount // 0) / $rc / 1000 + | . * 10 | round / 10 + else "n/a" + end + ) (requests=\(.nydusd.blobcache.prefetch_requests_count // 0), unmerged_chunks=\(.nydusd.blobcache.prefetch_unmerged_chunks // 0))", + "prefetch_avg_latency_ms : \( + (.nydusd.blobcache.prefetch_requests_count // 0) as $rc | + if $rc > 0 then + (.nydusd.blobcache.prefetch_cumulative_time_millis // 0) / $rc + | . * 10 | round / 10 + else "n/a" + end + )", + "prefetch_bandwidth_MBps : \( + ((.nydusd.blobcache.prefetch_end_time_secs // 0) + + (.nydusd.blobcache.prefetch_end_time_millis // 0) / 1000) as $end | + ((.nydusd.blobcache.prefetch_begin_time_secs // 0) + + (.nydusd.blobcache.prefetch_begin_time_millis // 0) / 1000) as $begin | + ($end - $begin) as $dur | + (.nydusd.blobcache.prefetch_data_amount // 0) as $pd | + if $dur > 0 and $pd > 0 then + $pd / 1000000 / $dur | . * 100 | round / 100 + else "n/a" + end + )", + "--- io interaction ---", + "io_breakdown (prefetch/ondemand/total): \( + (.nydusd.blobcache.prefetch_data_amount // 0) as $pd | + (.nydusd.backend.read_amount_total // 0) as $net | + ($net - $pd) as $od | + ($pd / 1000000 | . * 100 | round / 100 | tostring) + " MB / " + + ($od / 1000000 | . * 100 | round / 100 | tostring) + " MB / " + + ($net / 1000000 | . * 100 | round / 100 | tostring) + " MB" + + if $net > 0 then + " (prefetch_share=\($pd / $net * 100 | . * 10 | round / 10)%)" + else "" + end + )", + "ondemand_backend_reads : \( + (.nydusd.backend.read_count // 0) as $total_reads | + (.nydusd.blobcache.prefetch_requests_count // 0) as $prefetch_reads | + [($total_reads - $prefetch_reads), 0] | max as $od_reads | + ($od_reads | tostring) + + if $total_reads > 0 then + " (\($od_reads / $total_reads * 100 | . * 10 | round / 10)% of total backend ops triggered by cache misses)" + else "" + end + )", + "prefetch_data_ready: \(.nydusd.blobcache.data_all_ready // false)", + "--- backend ---", "backend_io_size p50/p95/p99 : \( (.nydusd.backend.read_count_block_size_dist // [0,0,0,0,0,0,0,0]) as $d | ($d | add // 0) as $n | @@ -417,7 +482,7 @@ jq -r ' "backend_fetch_MB : \( (.nydusd.backend.read_amount_total // 0) / 1000000 | . * 100 | round / 100 - )", + ) (errors=\(.nydusd.backend.read_errors // 0))", "network_efficiency: \( (.nydusd.backend.read_amount_total // 0) as $net | (.nydusd.fs.data_read // 0) as $app | From a2b244729641bb1eaf504c02de153b9025bda5a5 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 9 May 2026 10:40:45 +0000 Subject: [PATCH 12/13] prefetch: add metrics instrumentation for blob streaming Add metrics tracking for the blob prefetcher to monitor streaming performance and cache efficiency. The new instrumentation captures timing, data volume, and request counts. Key changes: - Record wall-clock start time when first blob begins streaming using SystemTime - Track prefetch_data_amount by accumulating compressed chunk sizes as they are cached - Increment prefetch_requests_count for each successful blob stream - Set prefetch_end_time_secs after each blob completes streaming - Add metrics() accessor to BlobCache trait to expose BlobcacheMetrics - Ensure begin time is only recorded once per prefetch session using std::sync::Once The metrics provide visibility into prefetch duration, throughput, and cache hit rates for performance analysis and optimization. Signed-off-by: Peng Tao --- rafs/src/prefetch.rs | 292 +++++++++++++++++++++++++++++++- storage/src/cache/cachedfile.rs | 9 + storage/src/cache/mod.rs | 7 + 3 files changed, 304 insertions(+), 4 deletions(-) diff --git a/rafs/src/prefetch.rs b/rafs/src/prefetch.rs index 15cd5f7582d..150066c2ddf 100644 --- a/rafs/src/prefetch.rs +++ b/rafs/src/prefetch.rs @@ -14,13 +14,14 @@ use std::collections::BTreeMap; use std::io::Read; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Arc, Condvar, Mutex, Once}; use std::thread; -use std::time::{Duration, Instant}; +use std::time::{Duration, Instant, SystemTime}; use nydus_storage::backend::RequestSource; use nydus_storage::cache::BlobCache; use nydus_storage::device::{BlobChunkInfo, BlobInfo}; +use nydus_utils::metrics::Metric; use crate::metadata::{RafsInodeExt, RafsSuper}; @@ -110,6 +111,7 @@ struct State { threads_count: usize, rate_limiter: Option>>, max_retry_per_blob: u64, + begin_timing_once: Once, } /// Streaming blob prefetcher that downloads entire blobs via rangeless GET @@ -149,6 +151,7 @@ impl BlobPrefetcher { threads_count, rate_limiter, max_retry_per_blob: DEFAULT_MAX_RETRY, + begin_timing_once: Once::new(), }), }) } @@ -445,6 +448,20 @@ impl BlobPrefetcher { chunk_status: &mut [bool], ) -> anyhow::Result<()> { let blob_id = blob.info.blob_id(); + let stream_start = SystemTime::now(); + + // Record the wall-clock start of prefetch on the first blob streamed. + if let Some(metrics) = cache.metrics() { + state.begin_timing_once.call_once(|| { + if let Ok(t) = stream_start.duration_since(SystemTime::UNIX_EPOCH) { + metrics.prefetch_begin_time_secs.set(t.as_secs()); + metrics + .prefetch_begin_time_millis + .set(t.subsec_millis() as u64); + } + }); + } + let last_chunk_end = blob .chunks .iter() @@ -565,6 +582,18 @@ impl BlobPrefetcher { "BlobPrefetcher: streamed blob {}, cached {} chunks", blob_id, chunks_cached ); + + // Update BlobcacheMetrics to include this blob in the prefetch statistics. + // One blob stream = one backend request (rangeless GET), regardless of how + // many chunks were extracted from it. prefetch_data_amount is accumulated + // per chunk in cache_chunk_data; here we count the request and update timing. + if chunks_cached > 0 { + if let Some(metrics) = cache.metrics() { + metrics.prefetch_requests_count.inc(); + metrics.calculate_prefetch_metrics(stream_start); + } + } + Ok(()) } } @@ -582,7 +611,7 @@ mod tests { }; use nydus_storage::{StorageError, StorageResult}; use nydus_utils::crypt::{Cipher, CipherContext}; - use nydus_utils::metrics::BackendMetrics; + use nydus_utils::metrics::{BackendMetrics, BlobcacheMetrics, Metric}; use nydus_utils::{compress, crypt, digest}; use crate::mock::MockChunkInfo; @@ -650,6 +679,7 @@ mod tests { /// Whether `cache_chunk_data` returns `Ok(true)` (true) or `Err` (false). cache_succeeds: bool, cache_calls: Arc, + blobcache_metrics: Arc, } impl BlobCache for MockBlobCache { @@ -719,10 +749,13 @@ mod tests { fn cache_chunk_data( &self, _chunk: &dyn BlobChunkInfo, - _data: &[u8], + data: &[u8], ) -> std::io::Result { self.cache_calls.fetch_add(1, Ordering::Relaxed); if self.cache_succeeds { + self.blobcache_metrics + .prefetch_data_amount + .add(data.len() as u64); Ok(true) } else { Err(std::io::Error::new( @@ -731,6 +764,10 @@ mod tests { )) } } + + fn metrics(&self) -> Option> { + Some(self.blobcache_metrics.clone()) + } } // ── LimitedReader ───────────────────────────────────────────────────────── @@ -762,6 +799,7 @@ mod tests { threads_count: 2, rate_limiter: None, max_retry_per_blob: 3, + begin_timing_once: Once::new(), }) } @@ -785,11 +823,13 @@ mod tests { let stream_calls = Arc::new(AtomicUsize::new(0)); let cache_calls = Arc::new(AtomicUsize::new(0)); let reader = Arc::new(MockBlobReader::new(stream_data, Arc::clone(&stream_calls))); + let blobcache_metrics = BlobcacheMetrics::new("mock", "/tmp"); let cache = Arc::new(MockBlobCache { chunk_map: Arc::new(MockChunkMap { ready }), reader, cache_succeeds, cache_calls: Arc::clone(&cache_calls), + blobcache_metrics, }); (cache, stream_calls, cache_calls) } @@ -1088,6 +1128,250 @@ mod tests { // ── prefetch_one_blob ──────────────────────────────────────────────────── + // ── BlobcacheMetrics instrumentation ───────────────────────────────────── + + #[test] + fn test_metrics_prefetch_data_amount_updated_per_cached_chunk() { + // Each newly cached chunk must add its compressed size to prefetch_data_amount. + let state = make_state(false); + let chunk_size = 20usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data: Vec = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_data_amount.count(), + chunk_size as u64, + "prefetch_data_amount must equal the compressed chunk size" + ); + } + + #[test] + fn test_metrics_prefetch_data_amount_accumulates_across_chunks() { + // With two chunks cached, prefetch_data_amount must be the sum of both sizes. + let state = make_state(false); + let sz0 = 8usize; + let sz1 = 12usize; + let chunk0 = Arc::new(MockChunkInfo::mock(0, 0, sz0 as u32, 0, sz0 as u32)); + let chunk1 = Arc::new(MockChunkInfo::mock(0, 8, sz1 as u32, 0, sz1 as u32)); + let blob = make_blob_work(vec![chunk0, chunk1]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 2]; + let data: Vec = vec![0u8; sz0 + sz1]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_data_amount.count(), + (sz0 + sz1) as u64, + "prefetch_data_amount must be the sum of all cached chunk sizes" + ); + } + + #[test] + fn test_metrics_prefetch_data_amount_not_updated_on_cache_error() { + // When cache_chunk_data returns Err, prefetch_data_amount must stay zero. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], false); // cache_succeeds = false + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_data_amount.count(), + 0, + "prefetch_data_amount must not be updated when caching fails" + ); + } + + #[test] + fn test_metrics_prefetch_requests_count_incremented_per_blob() { + // After streaming one blob with at least one newly cached chunk, + // prefetch_requests_count must be exactly 1. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_requests_count.count(), + 1, + "exactly one blob stream = one prefetch request" + ); + } + + #[test] + fn test_metrics_prefetch_requests_count_zero_when_no_chunks_cached() { + // If nothing gets cached (empty stream), prefetch_requests_count must stay 0. + let state = make_state(false); + let chunk = Arc::new(MockChunkInfo::mock(0, 0, 10, 0, 10)); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); // empty stream data + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let reader: Box = Box::new(std::io::Cursor::new(vec![])); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_requests_count.count(), + 0, + "no chunks cached → no prefetch request counted" + ); + } + + #[test] + fn test_metrics_prefetch_begin_time_set_on_first_blob() { + // The prefetch_begin_time_secs must be non-zero after streaming the first blob. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert!( + metrics.prefetch_begin_time_secs.count() > 0, + "prefetch_begin_time_secs must be set after the first blob stream" + ); + } + + #[test] + fn test_metrics_prefetch_begin_time_set_only_once() { + // Calling stream_and_cache twice on the same state must set begin_time only + // on the first call (the Once cell prevents overwriting). + let state = make_state(false); + let chunk_size = 10usize; + let make_chunk = || { + Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )) + }; + let blob1 = make_blob_work(vec![make_chunk()]); + let blob2 = make_blob_work(vec![make_chunk()]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + + let reader1: Box = Box::new(std::io::Cursor::new(vec![0u8; chunk_size])); + let mut status1 = vec![false; 1]; + BlobPrefetcher::stream_and_cache(&state, reader1, &blob1, &cache_arc, 0, &mut status1) + .unwrap(); + + let first_begin = metrics.prefetch_begin_time_secs.count(); + + // Small delay to ensure a different timestamp would be written if Once didn't guard it. + std::thread::sleep(Duration::from_millis(10)); + + let reader2: Box = Box::new(std::io::Cursor::new(vec![0u8; chunk_size])); + let mut status2 = vec![false; 1]; + BlobPrefetcher::stream_and_cache(&state, reader2, &blob2, &cache_arc, 0, &mut status2) + .unwrap(); + + assert_eq!( + metrics.prefetch_begin_time_secs.count(), + first_begin, + "begin time must not be overwritten on subsequent blob streams" + ); + } + + #[test] + fn test_metrics_prefetch_end_time_set_after_streaming() { + // prefetch_end_time_secs must be non-zero after a blob is streamed. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert!( + metrics.prefetch_end_time_secs.count() > 0, + "prefetch_end_time_secs must be set after streaming" + ); + } + + // ── prefetch_one_blob ──────────────────────────────────────────────────── + #[test] fn test_prefetch_one_blob_all_chunks_ready() { // When every chunk is already cached (is_ready = true), no stream_read diff --git a/storage/src/cache/cachedfile.rs b/storage/src/cache/cachedfile.rs index fa4af42e5e1..c7e45baf1a2 100644 --- a/storage/src/cache/cachedfile.rs +++ b/storage/src/cache/cachedfile.rs @@ -557,6 +557,10 @@ impl BlobCache for FileCacheEntry { &self.blob_id } + fn metrics(&self) -> Option> { + Some(self.metrics.clone()) + } + fn blob_uncompressed_size(&self) -> Result { Ok(self.blob_uncompressed_size) } @@ -877,6 +881,11 @@ impl BlobCache for FileCacheEntry { })(); self.update_chunk_pending_status(chunk, result.is_ok()); + if result.is_ok() { + self.metrics + .prefetch_data_amount + .add(compressed_data.len() as u64); + } result.map(|_| true) } } diff --git a/storage/src/cache/mod.rs b/storage/src/cache/mod.rs index 5918cc82872..e6945a1f998 100644 --- a/storage/src/cache/mod.rs +++ b/storage/src/cache/mod.rs @@ -24,6 +24,7 @@ use std::time::Instant; use fuse_backend_rs::file_buf::FileVolatileSlice; use nydus_utils::compress::zlib_random::ZranDecoder; use nydus_utils::crypt::{self, Cipher, CipherContext}; +use nydus_utils::metrics::BlobcacheMetrics; use nydus_utils::{compress, digest}; use crate::backend::{BlobBackend, BlobReader, RequestSource}; @@ -429,6 +430,12 @@ pub trait BlobCache: Send + Sync { fn get_blob_meta_info(&self) -> Result>> { Ok(None) } + + /// Get the [BlobcacheMetrics](../../nydus_utils/metrics/struct.BlobcacheMetrics.html) for this + /// cache object. Returns `None` for cache implementations that do not track metrics. + fn metrics(&self) -> Option> { + None + } } /// An iterator to enumerate decompressed data for chunks. From 7d2cbebeacd10b63a18cebe9fea9f6bdc4b780d8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 22 May 2026 07:30:08 +0000 Subject: [PATCH 13/13] ci: change perftest trigger from push/PR to daily schedule Replace push and pull request triggers with a daily cron schedule at 00:40 UTC. This reduces unnecessary workflow runs while maintaining regular performance testing. The workflow will now run automatically once per day and can still be triggered manually via workflow_dispatch when needed. Signed-off-by: Peng Tao --- .github/workflows/perftest.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/perftest.yml b/.github/workflows/perftest.yml index 2fbf9a304e0..8eda3cdba5f 100644 --- a/.github/workflows/perftest.yml +++ b/.github/workflows/perftest.yml @@ -1,12 +1,9 @@ name: Perftest Image (Dragonfly SDK Proxy) on: - push: - branches: ["**", "stable/**"] - paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] - pull_request: - branches: ["**", "stable/**"] - paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] + schedule: + # Everyday at 00:40 clock UTC + - cron: "40 0 * * *" workflow_dispatch: inputs: nydus_image: