diff --git a/.github/workflows/perftest.yml b/.github/workflows/perftest.yml new file mode 100644 index 00000000000..8eda3cdba5f --- /dev/null +++ b/.github/workflows/perftest.yml @@ -0,0 +1,348 @@ +name: Perftest Image (Dragonfly SDK Proxy) + +on: + schedule: + # Everyday at 00:40 clock UTC + - cron: "40 0 * * *" + workflow_dispatch: + inputs: + nydus_image: + description: 'Nydus image to benchmark' + required: false + default: 'ghcr.io/dragonflyoss/image-service/nginx:nydus-latest' + +permissions: + contents: read + packages: write + +env: + DRAGONFLY_VERSION: "2.4.3" + CLIENT_VERSION: "1.3.3" + # PERFTEST_IMAGE is computed per-job from the repository owner (lower-cased) + # and the commit SHA so each commit gets a unique, content-addressed tag. + +jobs: + build-image: + name: Build perftest image + runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} + timeout-minutes: 60 + permissions: + contents: read + packages: write + outputs: + image: ${{ steps.meta.outputs.image }} + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Compute image reference + id: meta + run: | + # GHCR requires lowercase repository names. + owner_lc="${GITHUB_REPOSITORY_OWNER,,}" + image="ghcr.io/${owner_lc}/nydus-perftest:${GITHUB_SHA}" + echo "image=${image}" >> "$GITHUB_OUTPUT" + echo "Resolved perftest image: ${image}" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Check whether image tag already exists + id: check + run: | + if docker manifest inspect "${{ steps.meta.outputs.image }}" > /dev/null 2>&1; then + echo "exists=true" >> "$GITHUB_OUTPUT" + echo "Image ${{ steps.meta.outputs.image }} already exists in GHCR; skipping build." + else + echo "exists=false" >> "$GITHUB_OUTPUT" + echo "Image ${{ steps.meta.outputs.image }} not found; will build and push." + fi + + - name: Set up Docker Buildx + if: steps.check.outputs.exists != 'true' + uses: docker/setup-buildx-action@v3 + + - name: Cache cargo registry / git via Buildx + if: steps.check.outputs.exists != 'true' + uses: actions/cache@v4 + with: + path: | + ~/.cache/buildx-perftest + key: buildx-perftest-${{ hashFiles('Cargo.lock', 'misc/perftest/Dockerfile') }} + restore-keys: | + buildx-perftest- + + - name: Build and push perftest image + if: steps.check.outputs.exists != 'true' + uses: docker/build-push-action@v6 + with: + context: . + file: misc/perftest/Dockerfile + tags: ${{ steps.meta.outputs.image }} + push: true + build-args: | + RUST_TARGET=x86_64-unknown-linux-musl + cache-from: type=local,src=~/.cache/buildx-perftest + cache-to: type=local,dest=~/.cache/buildx-perftest,mode=max + + - name: Verify image bundles required binaries + env: + PERFTEST_IMAGE: ${{ steps.meta.outputs.image }} + run: | + docker pull "${PERFTEST_IMAGE}" + docker run --rm --entrypoint /usr/local/bin/nydusd ${PERFTEST_IMAGE} --version + docker run --rm --entrypoint /usr/local/bin/nydusctl ${PERFTEST_IMAGE} --help | head -5 + docker run --rm --entrypoint /usr/local/bin/crane ${PERFTEST_IMAGE} version + docker run --rm --entrypoint /usr/local/bin/workload ${PERFTEST_IMAGE} --help 2>&1 | head -10 || true + # Sanity-check the binaries are static (no dynamic linker references). + docker run --rm --entrypoint /bin/sh ${PERFTEST_IMAGE} -c \ + 'for b in /usr/local/bin/nydusd /usr/local/bin/nydusctl /usr/local/bin/workload; do + echo "=== $b ===" + file "$b" 2>/dev/null || true + ldd "$b" 2>&1 || true + done' + + dragonfly-download: + name: Download Dragonfly binaries + runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} + timeout-minutes: 10 + steps: + - name: Cache Dragonfly binaries + id: cache-dragonfly + uses: actions/cache@v4 + with: + path: /tmp/dragonfly-bin + key: dragonfly-${{ env.DRAGONFLY_VERSION }}-client-${{ env.CLIENT_VERSION }}-linux-amd64 + + - name: Download Dragonfly server binaries + if: steps.cache-dragonfly.outputs.cache-hit != 'true' + run: | + mkdir -p /tmp/dragonfly-bin + wget -q -O /tmp/dragonfly-server.tar.gz \ + "https://github.com/dragonflyoss/dragonfly/releases/download/v${DRAGONFLY_VERSION}/dragonfly-${DRAGONFLY_VERSION}-linux-amd64.tar.gz" + tar -xzf /tmp/dragonfly-server.tar.gz -C /tmp/dragonfly-bin manager scheduler + rm /tmp/dragonfly-server.tar.gz + + - name: Download Dragonfly client binaries + if: steps.cache-dragonfly.outputs.cache-hit != 'true' + run: | + wget -q -O /tmp/dragonfly-client.tar.gz \ + "https://github.com/dragonflyoss/client/releases/download/v${CLIENT_VERSION}/dragonfly-client-v${CLIENT_VERSION}-x86_64-unknown-linux-musl.tar.gz" + tar -xzf /tmp/dragonfly-client.tar.gz --strip-components=1 -C /tmp/dragonfly-bin + rm /tmp/dragonfly-client.tar.gz + + - name: Upload Dragonfly Binaries + uses: actions/upload-artifact@v6 + with: + name: dragonfly-artifact + path: /tmp/dragonfly-bin + retention-days: 1 + + perftest-run: + name: Run perftest against Dragonfly + runs-on: ${{ vars.RUNNER_OS || 'ubuntu-latest' }} + needs: [build-image, dragonfly-download] + timeout-minutes: 30 + permissions: + contents: read + packages: read + env: + NYDUS_IMAGE: ${{ github.event.inputs.nydus_image || 'ghcr.io/dragonflyoss/image-service/nginx:nydus-latest' }} + PERFTEST_IMAGE: ${{ needs.build-image.outputs.image }} + # Optional. Base64("user:password") string forwarded into the perftest + # container so both `crane` (bootstrap fetch) and the nydusd registry + # backend can authenticate against private NYDUS_IMAGE registries. + REGISTRY_AUTH: ${{ secrets.PERFTEST_REGISTRY_AUTH }} + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull perftest image + run: | + docker pull "${PERFTEST_IMAGE}" + docker images | grep nydus-perftest + + - name: Download Dragonfly artifacts + uses: actions/download-artifact@v7 + with: + name: dragonfly-artifact + path: /usr/local/bin + + - name: Install Dragonfly binaries + run: | + sudo chmod +x /usr/local/bin/manager /usr/local/bin/scheduler /usr/local/bin/dfdaemon + + # ---------- Dragonfly control plane (mirrors e2e-dragonfly.yml) ---------- + - name: Start MySQL + run: | + docker run -d --name mysql \ + -e MYSQL_ROOT_PASSWORD=dragonfly \ + -e MYSQL_DATABASE=manager \ + -p 3306:3306 \ + mysql:8 + for i in $(seq 1 60); do + if docker exec mysql mysqladmin ping -h 127.0.0.1 -u root -pdragonfly --silent 2>/dev/null; then + echo "MySQL is ready"; break + fi + if [ "$i" -eq 60 ]; then echo "ERROR: MySQL failed"; docker logs mysql; exit 1; fi + sleep 2 + done + + - name: Start Redis + run: | + docker run -d --name redis -p 6379:6379 redis:latest + for i in $(seq 1 30); do + if docker exec redis redis-cli ping 2>/dev/null | grep -q PONG; then + echo "Redis is ready"; break + fi + if [ "$i" -eq 30 ]; then echo "ERROR: Redis failed"; docker logs redis; exit 1; fi + sleep 1 + done + + - name: Setup Dragonfly configs + run: | + sudo mkdir -p /etc/dragonfly + sudo cp misc/dragonfly/manager.yaml /etc/dragonfly/manager.yaml + sudo cp misc/dragonfly/scheduler.yaml /etc/dragonfly/scheduler.yaml + sudo cp misc/dragonfly/dfdaemon.yaml /etc/dragonfly/dfdaemon.yaml + sudo mkdir -p /tmp/dragonfly/logs /tmp/dragonfly/cache /tmp/dragonfly/storage + sudo chmod 777 /tmp/dragonfly/logs /tmp/dragonfly/cache /tmp/dragonfly/storage + mkdir -p /tmp/perftest-results + + - name: Start Manager + run: | + sudo nohup /usr/local/bin/manager --config /etc/dragonfly/manager.yaml \ + > /tmp/dragonfly/logs/manager.log 2>&1 & + for i in $(seq 1 60); do + if curl -fsS http://127.0.0.1:8080/healthy >/dev/null 2>&1; then + echo "Manager is ready"; break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: Manager failed"; sudo cat /tmp/dragonfly/logs/manager.log || true; exit 1 + fi + sleep 2 + done + + - name: Start Scheduler + run: | + sudo nohup /usr/local/bin/scheduler --config /etc/dragonfly/scheduler.yaml \ + > /tmp/dragonfly/logs/scheduler.log 2>&1 & + # Scheduler doesn't expose a friendly health endpoint here; just give it a moment + # and verify the gRPC port is listening. + for i in $(seq 1 30); do + if ss -tln 2>/dev/null | grep -q ':8002'; then + echo "Scheduler is listening on :8002"; break + fi + if [ "$i" -eq 30 ]; then + echo "ERROR: Scheduler failed"; sudo cat /tmp/dragonfly/logs/scheduler.log || true; exit 1 + fi + sleep 2 + done + + - name: Start dfdaemon + run: | + sudo nohup /usr/local/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml \ + > /tmp/dragonfly/logs/dfdaemon.log 2>&1 & + for i in $(seq 1 60); do + if ss -tln 2>/dev/null | grep -q ':4001'; then + echo "dfdaemon proxy is listening on :4001"; break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: dfdaemon failed"; sudo cat /tmp/dragonfly/logs/dfdaemon.log || true; exit 1 + fi + sleep 2 + done + + # ---------- Run the perftest container ---------- + - name: Run perftest container + run: | + mkdir -p /tmp/perftest-results + # Allow the container to reach the host's dfdaemon via host.docker.internal. + docker run --rm \ + --name nydus-perftest-run \ + --add-host host.docker.internal:host-gateway \ + --privileged \ + --device /dev/fuse \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ + -e NYDUS_IMAGE="${NYDUS_IMAGE}" \ + -e REGISTRY_AUTH="${REGISTRY_AUTH:-}" \ + -e DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ + -e DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ + -e READ_PARALLELISM=8 \ + -e MAX_FILES=200 \ + -e MOUNT_READY_TIMEOUT=120 \ + -e DIGEST_VALIDATE=true \ + -e STREAM_PREFETCH=true \ + -v /tmp/perftest-results:/results \ + "${PERFTEST_IMAGE}" + + - name: Show result.json + if: always() + run: | + if [ -f /tmp/perftest-results/result.json ]; then + echo "=== result.json ===" + cat /tmp/perftest-results/result.json + else + echo "ERROR: result.json was not produced." + ls -la /tmp/perftest-results || true + exit 1 + fi + + - name: Assert benchmark succeeded + run: | + # Require: workload exited 0, at least one file was read, and bytes > 0. + jq -e ' + (.workload_rc // 1) == 0 + and (.workload.files_read // 0) > 0 + and (.workload.bytes_read // 0) > 0 + ' /tmp/perftest-results/result.json + + - name: Upload result.json + if: always() + uses: actions/upload-artifact@v6 + with: + name: perftest-result + path: /tmp/perftest-results/ + + - name: Dump service logs + if: always() + continue-on-error: true + run: | + mkdir -p /tmp/perftest-logs + sudo cp /tmp/dragonfly/logs/*.log /tmp/perftest-logs/ 2>/dev/null || true + sudo cp -r /var/log/dragonfly/dfdaemon/ /tmp/perftest-logs/dfdaemon/ 2>/dev/null || true + sudo cp -r /var/log/dragonfly/scheduler/ /tmp/perftest-logs/scheduler/ 2>/dev/null || true + sudo cp -r /var/log/dragonfly/manager/ /tmp/perftest-logs/manager/ 2>/dev/null || true + docker logs mysql > /tmp/perftest-logs/mysql.log 2>&1 || true + docker logs redis > /tmp/perftest-logs/redis.log 2>&1 || true + sudo chmod -R a+r /tmp/perftest-logs || true + + - name: Upload service logs + if: always() + uses: actions/upload-artifact@v6 + with: + name: perftest-service-logs + path: /tmp/perftest-logs/ + + - name: Cleanup + if: always() + continue-on-error: true + run: | + docker rm -f nydus-perftest-run mysql redis 2>/dev/null || true + for proc in dfdaemon scheduler manager; do + if pid=$(pgrep -n "${proc}" 2>/dev/null); then + sudo kill "${pid}" 2>/dev/null || true + fi + done diff --git a/Makefile b/Makefile index 6953b657b19..e3dede36826 100644 --- a/Makefile +++ b/Makefile @@ -209,3 +209,11 @@ nydus-overlayfs-lint: docker-static: docker build -t nydus-rs-static --build-arg RUST_TARGET=${RUST_TARGET_STATIC} misc/musl-static docker run --rm ${CARGO_BUILD_GEARS} -e RUST_TARGET=${RUST_TARGET_STATIC} --workdir /nydus-rs -v ${current_dir}:/nydus-rs nydus-rs-static + +# Build the perf-test image (Dragonfly proxy SDK mode). See misc/perftest/README.md. +PERFTEST_IMAGE ?= nydus-perftest:latest +perftest-image: + docker build -f misc/perftest/Dockerfile \ + --build-arg RUST_TARGET=${RUST_TARGET_STATIC} \ + -t ${PERFTEST_IMAGE} ${current_dir} +.PHONY: perftest-image diff --git a/misc/perftest/.dockerignore b/misc/perftest/.dockerignore new file mode 100644 index 00000000000..5fa644c21d0 --- /dev/null +++ b/misc/perftest/.dockerignore @@ -0,0 +1,5 @@ +target/ +**/target/ +.git/ +*.profraw +coverage/ diff --git a/misc/perftest/Dockerfile b/misc/perftest/Dockerfile new file mode 100644 index 00000000000..4d033e1c3fd --- /dev/null +++ b/misc/perftest/Dockerfile @@ -0,0 +1,129 @@ +# syntax=docker/dockerfile:1.6 +# +# Nydus performance test image for Dragonfly proxy SDK mode. +# +# This image bundles a statically-linked nydusd (musl) plus a small benchmark +# harness that mounts a Nydus image via FUSE and reads every regular file +# in parallel, exercising the Dragonfly SDK proxy path. Bind-mount a local +# nydusd over /usr/local/bin/nydusd to test a new daemon without rebuilding +# this image. Dragonfly (dfdaemon + scheduler) is expected to be deployed +# OUTSIDE the container; the proxy and scheduler endpoints are passed via +# environment variables. +# +# See README.md in this directory for usage. + +# ----------------------------------------------------------------------------- +# Stage 1: build a static nydusd (and nydusctl) on musl. +# +# `make static-release` honours the workspace defaults plus virtiofs on Linux. +# The `backend-dragonfly-proxy` feature is target-gated for x86_64/aarch64 in +# the root Cargo.toml, so it is automatically enabled on supported arches. +# ----------------------------------------------------------------------------- +ARG RUST_TARGET=x86_64-unknown-linux-musl +FROM clux/muslrust:1.84.0-stable AS builder + +ARG RUST_TARGET +ENV RUST_TARGET=${RUST_TARGET} + +# protoc is required by tonic / dragonfly-api build scripts. +RUN apt-get update \ + && apt-get install -y --no-install-recommends cmake protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /nydus-rs +COPY . /nydus-rs + +RUN rustup target add "${RUST_TARGET}" + +# Lean build: directly invoke cargo for the nydus binaries we ship, skipping +# fmt/clippy and contrib targets that `make static-release` would also run. +# The `backend-dragonfly-proxy` storage feature is target-gated for x86_64 / +# aarch64 in the workspace root Cargo.toml, so it is automatically enabled on +# the supported architectures. +RUN cargo build --release --target "${RUST_TARGET}" \ + --bin nydusd --bin nydusctl --features=virtiofs \ + && cp target/${RUST_TARGET}/release/nydusd /out-nydusd \ + && cp target/${RUST_TARGET}/release/nydusctl /out-nydusctl \ + && /out-nydusd --version + +# ----------------------------------------------------------------------------- +# Stage 2: Go builder. Produces a static `crane` for portable image manifest / +# blob access AND the static `workload` benchmark binary used by the harness. +# ----------------------------------------------------------------------------- +FROM golang:1.22-alpine AS go-builder +RUN apk add --no-cache git ca-certificates \ + && go install github.com/google/go-containerregistry/cmd/crane@v0.20.2 \ + && cp /go/bin/crane /out-crane + +WORKDIR /workload +COPY misc/perftest/workload/ /workload/ +RUN CGO_ENABLED=0 go build -trimpath -ldflags '-s -w' -o /out-workload ./... + +# ----------------------------------------------------------------------------- +# Stage 3: minimal runtime image. +# +# Ubuntu keeps the bundled static musl binaries working while also allowing +# /usr/local/bin/nydusd to be replaced by a normal glibc-linked nydusd +# bind-mounted from a local host build. The only runtime tooling we need beyond +# bash is fuse3 (for the FUSE mount), jq (for JSON munging), gettext-base +# (envsubst), curl + tar + ca-certificates (used by crane via TLS), and tini. +# ----------------------------------------------------------------------------- +FROM ubuntu:24.04 + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + bash \ + ca-certificates \ + coreutils \ + curl \ + file \ + fuse3 \ + gettext-base \ + iproute2 \ + jq \ + libgcc-s1 \ + tar \ + tini \ + util-linux \ + zlib1g \ + busybox \ + && rm -rf /var/lib/apt/lists/* \ + && mkdir -p /etc/nydus /mnt/nydus /results /var/lib/nydus/cache /tmp/nydus + +COPY --from=builder /out-nydusd /usr/local/bin/nydusd +COPY --from=builder /out-nydusctl /usr/local/bin/nydusctl +COPY --from=go-builder /out-crane /usr/local/bin/crane +COPY --from=go-builder /out-workload /usr/local/bin/workload + +COPY misc/perftest/entrypoint.sh /usr/local/bin/perftest-entrypoint +COPY misc/perftest/fetch-bootstrap.sh /usr/local/bin/fetch-bootstrap +COPY misc/perftest/config.template.json /etc/nydus/config.template.json + +RUN chmod +x /usr/local/bin/perftest-entrypoint \ + /usr/local/bin/fetch-bootstrap + +# Sensible defaults; all are overridable at `docker run` time. +ENV NYDUS_IMAGE="ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" \ + DRAGONFLY_PROXY_URL="http://host.docker.internal:4001" \ + DRAGONFLY_SCHEDULER_ENDPOINT="http://host.docker.internal:8002" \ + REGISTRY_SCHEME="https" \ + ENABLE_DEFAULT_ROUTE_WORKAROUND="false" \ + DIGEST_VALIDATE="false" \ + STREAM_PREFETCH="false" \ + STREAM_PREFETCH_THREADS="5" \ + STREAM_PREFETCH_BANDWIDTH="0" \ + STREAM_PREFETCH_MAX_RETRY="10" \ + MOUNT_POINT="/mnt/nydus" \ + RESULTS_DIR="/results" \ + WORK_DIR="/tmp/nydus" \ + BLOB_CACHE_DIR="/var/lib/nydus/cache" \ + READ_PARALLELISM="16" \ + READ_CHUNK_SIZE="1048576" \ + MAX_FILES="0" \ + MOUNT_READY_TIMEOUT="60" \ + NYDUSD_LOG_LEVEL="info" \ + PLATFORM="linux/amd64" + +VOLUME ["/results"] +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/perftest-entrypoint"] +CMD [] diff --git a/misc/perftest/README.md b/misc/perftest/README.md new file mode 100644 index 00000000000..d2316eb81bb --- /dev/null +++ b/misc/perftest/README.md @@ -0,0 +1,221 @@ +# nydus perftest image (Dragonfly proxy SDK mode) + +A self-contained container image that mounts a Nydus image via FUSE and +benchmarks cold-cache parallel reads through **Dragonfly proxy SDK mode**. + +Dragonfly itself (dfdaemon + scheduler) is expected to run **outside** this +container. The proxy and scheduler endpoints are passed in via environment +variables. The image bundles a static nydusd by default; bind-mount a locally +built nydusd over `/usr/local/bin/nydusd` when comparing daemon changes without +rebuilding the image. + +## What it measures + +The benchmark performs a single cold-cache pass: + +1. nydusd is started with the supplied config and a freshly-fetched bootstrap. +2. The harness waits until FUSE is mounted *and* `nydusctl info` reports + `state == RUNNING`. The elapsed time is recorded as **mount_ready_sec**. +3. A thread pool reads every regular file under the mountpoint in parallel + chunks. The harness records: + - `files_read`, `bytes_read`, `wall_clock_sec` + - `throughput_mbps = bytes_read / wall_clock` + - per-file read `latency_ms` (mean, p50, p90, p95, p99) +4. `nydusctl info` and `nydusctl metrics` (backend, blobcache, fs) are scraped + at the end and embedded in the JSON summary so you can confirm requests + actually went through the SDK proxy path. + +> **Cold-cache caveat.** Each container run starts with an empty local +> blobcache (`BLOB_CACHE_DIR`, default `/var/lib/nydus/cache`). However, the +> external dfdaemon's own cache persists across runs. For a true cold pass: +> either flush the dfdaemon cache between runs, or use a unique image per run. + +## Building + +From the repo root: + +```bash +make perftest-image # builds nydus-perftest:latest +# or directly: +docker build -f misc/perftest/Dockerfile -t nydus-perftest:latest . +``` + +The Dockerfile has three stages: + +1. A musl Rust builder (`clux/muslrust`) that runs + `cargo build --release --target $RUST_TARGET --bin nydusd --bin nydusctl --features=virtiofs`. + The `backend-dragonfly-proxy` storage feature is target-gated for x86_64 / + aarch64 in the workspace `Cargo.toml`, so it is automatically enabled on + supported arches. `protoc` + `cmake` are pre-installed for the tonic / + dragonfly-api build scripts. +2. A Go builder (`golang:1.22-alpine`) that produces a static `crane` binary + (for image manifest / bootstrap resolution) and a static `workload` binary + built from `misc/perftest/workload/`. +3. An Ubuntu runtime containing `nydusd`, `nydusctl`, `crane`, `workload`, + plus `bash`, `fuse3`, `jq`, `gettext-base`, `tar`, `tini`, and the + libraries needed by normal glibc-linked local nydusd builds. + +For arm64 hosts, set `--build-arg RUST_TARGET=aarch64-unknown-linux-musl` +(or `make perftest-image RUST_TARGET_STATIC=aarch64-unknown-linux-musl`). + +## Running + +Minimum invocation against an external Dragonfly (dfdaemon listening on the +host at `:4001`, scheduler at `:8002`): + +```bash +docker run --rm \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ + --add-host host.docker.internal:host-gateway \ + -e NYDUS_IMAGE=ghcr.io/dragonflyoss/image-service/nginx:nydus-latest \ + -e DRAGONFLY_PROXY_URL=http://host.docker.internal:4001 \ + -e DRAGONFLY_SCHEDULER_ENDPOINT=http://host.docker.internal:8002 \ + -v "$PWD/results:/results" \ + nydus-perftest:latest +``` + +If FUSE inside the container fails on your host, fall back to `--privileged` +(rootless Docker / Podman often need this). The `--add-host` flag is only +needed on Linux to make `host.docker.internal` resolve to the host gateway. + +The summary is written to `./results/result.json` and printed to stderr. + +For Kubernetes, `misc/perftest/pod.yaml` provides a Pod example with every +perftest environment option listed explicitly. Update the image, Dragonfly +service endpoints, registry credentials, and optional local nydusd hostPath +before applying it. The example enables `hostNetwork` so registry access and +Dragonfly SDK local-IP discovery use the node network; remove it only if your +pod network provides a working default route. If the pod has endpoint-specific +routes but no default IPv4 route, set `ENABLE_DEFAULT_ROUTE_WORKAROUND=true` +to let the entrypoint add a default route derived from the Dragonfly or registry +route; this requires `CAP_NET_ADMIN` or a privileged container. + +## Configuration + +### Using a locally built nydusd + +By default the harness runs the bundled `/usr/local/bin/nydusd`. To test a +locally built daemon without rebuilding the perftest image, bind-mount it over +that path: + +```bash +docker run --rm \ + --privileged \ + --device /dev/fuse \ + --security-opt apparmor=unconfined \ + --security-opt seccomp=unconfined \ + --add-host host.docker.internal:host-gateway \ + -e NYDUS_IMAGE=ghcr.io/dragonflyoss/image-service/nginx:nydus-latest \ + -v "$PWD/target/release/nydusd:/usr/local/bin/nydusd:ro" \ + -v "$PWD/results:/results" \ + nydus-perftest:latest +``` + +The entrypoint validates `/usr/local/bin/nydusd` before fetching the bootstrap, +logs `nydusd --version`, and records the selected binary and version in +`result.json` and the printed summary. + +### Option A: bring your own nydusd config (recommended for real workloads) + +```bash +-v /path/to/nydusd.json:/etc/nydus/user.json:ro \ +-e NYDUSD_CONFIG=/etc/nydus/user.json +``` + +When `NYDUSD_CONFIG` is set and points to an existing file, the harness uses +it verbatim. You are responsible for setting `host`/`repo`/`proxy.url`/ +`proxy.dragonfly_scheduler_endpoint` correctly. You may also pre-supply a +bootstrap file with `-v ...:/path/bootstrap -e BOOTSTRAP_PATH=/path/bootstrap` +to skip the registry fetch. + +### Option B: render config from template + +When `NYDUSD_CONFIG` is unset, `config.template.json` is rendered with these +env vars (defaults shown): + +| Variable | Default | Notes | +|--------------------------------|--------------------------------------------------------------|-------| +| `NYDUS_IMAGE` | `ghcr.io/dragonflyoss/image-service/nginx:nydus-latest` | Full image ref. Parsed into REGISTRY_HOST/REPO. | +| `REGISTRY_HOST` | (parsed from NYDUS_IMAGE) | Override if the parser guesses wrong. | +| `REGISTRY_REPO` | (parsed from NYDUS_IMAGE) | | +| `REGISTRY_SCHEME` | `https` | | +| `REGISTRY_AUTH` | empty | base64(user:pass) for basic auth. | +| `REGISTRY_SKIP_VERIFY` | `false` | | +| `DRAGONFLY_PROXY_URL` | `http://host.docker.internal:4001` | dfdaemon proxy listen URL. | +| `DRAGONFLY_SCHEDULER_ENDPOINT` | `http://host.docker.internal:8002` | Non-empty value enables SDK mode. | +| `PROXY_FALLBACK` | `true` | Fall back to direct registry if proxy is unhealthy. | +| `ENABLE_DEFAULT_ROUTE_WORKAROUND` | `false` | Add a default IPv4 route derived from Dragonfly/registry endpoint routes when none exists. Requires `CAP_NET_ADMIN` or privileged mode. | +| `DIGEST_VALIDATE` | `false` | Enable RAFS metadata digest validation. | +| `BLOB_CACHE_DIR` | `/var/lib/nydus/cache` | nydusd blobcache work_dir. | +| `PREFETCH_ENABLE` | `false` | Background prefetch threads. | +| `PREFETCH_THREADS` | `8` | | +| `STREAM_PREFETCH` | `false` | Enable Dragonfly streaming blob prefetch. | +| `STREAM_PREFETCH_THREADS` | `5` | Concurrent streaming prefetch workers. | +| `STREAM_PREFETCH_BANDWIDTH` | `0` | Streaming prefetch bandwidth limit in bytes/sec; 0 uses nydusd default. | +| `STREAM_PREFETCH_MAX_RETRY` | `10` | Maximum retry attempts per blob for streaming prefetch. | + +### Workload knobs + +| Variable | Default | Notes | +|----------------------|---------|-------| +| `READ_PARALLELISM` | `16` | Concurrent file readers. | +| `READ_CHUNK_SIZE` | `1048576` | Bytes per `read()` call. | +| `MAX_FILES` | `0` | Cap files read; 0 = no cap. | +| `MOUNT_READY_TIMEOUT`| `60` | Seconds to wait for FUSE + RUNNING. | +| `NYDUSD_LOG_LEVEL` | `info` | trace/debug/info/warn/error. | +| `PLATFORM` | `linux/amd64` | OCI platform for multi-arch images. | +| `RESULTS_DIR` | `/results` | Where `result.json` is written. | + +### Bootstrap + +The bootstrap (image metadata) is required by nydusd. By default the harness +fetches it from `NYDUS_IMAGE` using `crane` (manifest -> bootstrap layer -> +untar to extract `image.boot`). To skip this step, mount a pre-extracted +bootstrap file and set `BOOTSTRAP_PATH=/path/to/bootstrap`. + +## Output + +`$RESULTS_DIR/result.json` has the shape: + +```jsonc +{ + "image": "ghcr.io/.../nginx:nydus-latest", + "platform": "linux/amd64", + "bootstrap_path": "/tmp/nydus/bootstrap", + "config_path": "/tmp/nydus/nydusd.json", + "dragonfly": { "proxy_url": "...", "scheduler_endpoint": "...", "proxy_fallback": true }, + "timing_sec": { "mount_ready": 0.643, "workload": 12.518 }, + "workload": { "files_read": 1213, "bytes_read": 142860288, "throughput_mbps": 11.42, + "latency_ms": { "mean": 18.4, "p50": 9.1, "p90": 41.7, "p95": 63.2, "p99": 121.0 } }, + "workload_rc": 0, + "nydusd": { + "binary": "/usr/local/bin/nydusd", + "version": "Version: ...", + "info": { ... }, // nydusctl info + "backend": { ... }, // backend metrics: bytes pulled, request count + "blobcache": { ... }, // cache hit/miss + "fs": { ... } // fs-level counters + } +} +``` + +Inspect `nydusd.backend` to confirm requests actually went through the SDK +proxy path; the request counters there are what tells you the SDK was used. + +## Limitations / known gotchas + +- **External dfdaemon cache is not flushed by this image.** If you want + cold-from-Dragonfly results, drop the dfdaemon cache between runs or + rotate the test image. +- **Bootstrap fetch goes direct (not through the proxy).** This is setup, + not the measured path; the data-blob reads are what's actually being + benchmarked through the SDK. +- **Single cold-pass only.** No warm-cache or baseline modes — keep this + image focused. If you need them, run twice with different cache dirs. +- **FUSE in containers** is fragile: rootless runtimes, locked-down seccomp + profiles, or AppArmor policies can all block the mount. Use + `--privileged` if the documented `--cap-add SYS_ADMIN --device /dev/fuse` + combination doesn't work on your host. diff --git a/misc/perftest/config.template.json b/misc/perftest/config.template.json new file mode 100644 index 00000000000..bec44dc80b5 --- /dev/null +++ b/misc/perftest/config.template.json @@ -0,0 +1,46 @@ +{ + "device": { + "backend": { + "type": "registry", + "config": { + "scheme": "${REGISTRY_SCHEME}", + "host": "${REGISTRY_HOST}", + "repo": "${REGISTRY_REPO}", + "auth": "${REGISTRY_AUTH}", + "skip_verify": ${REGISTRY_SKIP_VERIFY}, + "timeout": 30, + "connect_timeout": 10, + "retry_limit": 2, + "proxy": { + "url": "${DRAGONFLY_PROXY_URL}", + "ping_url": "${DRAGONFLY_PROXY_URL}", + "fallback": ${PROXY_FALLBACK}, + "check_interval": 5, + "use_http": false, + "dragonfly_scheduler_endpoint": "${DRAGONFLY_SCHEDULER_ENDPOINT}" + } + } + }, + "cache": { + "type": "blobcache", + "compressed": false, + "config": { + "work_dir": "${BLOB_CACHE_DIR}" + } + } + }, + "mode": "direct", + "digest_validate": ${DIGEST_VALIDATE}, + "iostats_files": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": ${PREFETCH_ENABLE}, + "threads_count": ${PREFETCH_THREADS}, + "merging_size": 131072, + "bandwidth_rate": 0, + "stream_prefetch": ${STREAM_PREFETCH}, + "stream_prefetch_threads": ${STREAM_PREFETCH_THREADS}, + "stream_prefetch_bandwidth": ${STREAM_PREFETCH_BANDWIDTH}, + "stream_prefetch_max_retry": ${STREAM_PREFETCH_MAX_RETRY} + } +} diff --git a/misc/perftest/entrypoint.sh b/misc/perftest/entrypoint.sh new file mode 100755 index 00000000000..799fb220987 --- /dev/null +++ b/misc/perftest/entrypoint.sh @@ -0,0 +1,498 @@ +#!/usr/bin/env bash +# +# Nydus + Dragonfly SDK proxy perf-test orchestrator. +# +# Phases: +# 1. Resolve the nydusd config: +# - If $NYDUSD_CONFIG points to an existing file, use it as-is. +# - Otherwise, render /etc/nydus/config.template.json with envsubst. +# 2. Resolve the bootstrap: +# - If $BOOTSTRAP_PATH is set and exists, use it. +# - Otherwise, fetch from $NYDUS_IMAGE via crane (see fetch-bootstrap). +# 3. Start /usr/local/bin/nydusd in FUSE mode with --apisock for telemetry. +# 4. Wait for FUSE to be mounted AND nydusd to report state RUNNING. +# 5. Run the parallel-read workload over $MOUNT_POINT. +# 6. Scrape nydusd metrics and emit a JSON summary to $RESULTS_DIR/result.json. +# 7. Unmount and exit cleanly. +set -euo pipefail + +log() { printf '[perftest] %s\n' "$*" >&2; } +die() { printf '[perftest] ERROR: %s\n' "$*" >&2; exit 1; } + +is_ipv4() { + [[ "$1" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] +} + +endpoint_host() { + local endpoint="$1" + local authority host + + [ -n "${endpoint}" ] || return 1 + authority="${endpoint#*://}" + authority="${authority%%/*}" + if [[ "${authority}" == \[*\]* ]]; then + host="${authority#\[}" + host="${host%%\]*}" + else + host="${authority%%:*}" + fi + [ -n "${host}" ] || return 1 + printf '%s\n' "${host}" +} + +resolve_ipv4() { + local host="$1" + local ip="" + + if is_ipv4 "${host}"; then + printf '%s\n' "${host}" + return 0 + fi + + if command -v getent >/dev/null 2>&1; then + ip="$(getent ahostsv4 "${host}" 2>/dev/null | awk 'NR == 1 {print $1}' || true)" + fi + if [ -z "${ip}" ] && command -v nslookup >/dev/null 2>&1; then + ip="$(nslookup "${host}" 2>/dev/null | awk ' + /^Address[[:space:]]+[0-9]+: / {print $3; exit} + /^Address: / && $2 !~ /:53$/ {print $2; exit} + ' || true)" + fi + [ -n "${ip}" ] || return 1 + printf '%s\n' "${ip}" +} + +ensure_default_ipv4_route() { + local endpoint host ip route gateway="" dev="" + local -a fields + + command -v ip >/dev/null 2>&1 || { + log "ip command not found; skipping default route workaround" + return + } + [ -z "$(ip -4 route show default 2>/dev/null)" ] || return + + log "No default IPv4 route found; trying to derive one from reachable endpoints" + for endpoint in "${DRAGONFLY_SCHEDULER_ENDPOINT}" "${DRAGONFLY_PROXY_URL}" "${REGISTRY_HOST:-}"; do + host="$(endpoint_host "${endpoint}" 2>/dev/null || true)" + [ -n "${host}" ] || continue + ip="$(resolve_ipv4 "${host}" 2>/dev/null || true)" + [ -n "${ip}" ] || { + log "Could not resolve IPv4 address for ${host}; skipping" + continue + } + + route="$(ip -4 route get "${ip}" 2>/dev/null || true)" + route="${route%%$'\n'*}" + [ -n "${route}" ] || { + log "No IPv4 route to ${host} (${ip}); skipping" + continue + } + + fields=(${route}) + gateway="" + dev="" + for ((i = 0; i < ${#fields[@]}; i++)); do + case "${fields[$i]}" in + via) + gateway="${fields[$((i + 1))]:-}" + ;; + dev) + dev="${fields[$((i + 1))]:-}" + ;; + esac + done + + [ -n "${dev}" ] || { + log "Route to ${host} (${ip}) has no device: ${route}" + continue + } + + if [ -n "${gateway}" ]; then + if ip route add default via "${gateway}" dev "${dev}" 2>/dev/null; then + log "Added default IPv4 route via ${gateway} dev ${dev} (derived from ${host}/${ip})" + return + fi + elif ip route add default dev "${dev}" 2>/dev/null; then + log "Added default IPv4 route dev ${dev} (derived from ${host}/${ip})" + return + fi + log "Failed to add default route from ${host}/${ip}: ${route}" + done + + log "No default IPv4 route could be derived; Dragonfly SDK local IP discovery may fail" +} + +# ---- Inputs ---------------------------------------------------------------- +NYDUS_IMAGE="${NYDUS_IMAGE:-}" +NYDUSD_CONFIG="${NYDUSD_CONFIG:-}" +BOOTSTRAP_PATH="${BOOTSTRAP_PATH:-}" +MOUNT_POINT="${MOUNT_POINT:-/mnt/nydus}" +RESULTS_DIR="${RESULTS_DIR:-/results}" +WORK_DIR="${WORK_DIR:-/tmp/nydus}" +BLOB_CACHE_DIR="${BLOB_CACHE_DIR:-/var/lib/nydus/cache}" +READ_PARALLELISM="${READ_PARALLELISM:-16}" +READ_CHUNK_SIZE="${READ_CHUNK_SIZE:-1048576}" +MAX_FILES="${MAX_FILES:-0}" +MOUNT_READY_TIMEOUT="${MOUNT_READY_TIMEOUT:-60}" +NYDUSD_LOG_LEVEL="${NYDUSD_LOG_LEVEL:-info}" +PLATFORM="${PLATFORM:-linux/amd64}" + +DRAGONFLY_PROXY_URL="${DRAGONFLY_PROXY_URL:-http://host.docker.internal:4001}" +DRAGONFLY_SCHEDULER_ENDPOINT="${DRAGONFLY_SCHEDULER_ENDPOINT:-http://host.docker.internal:8002}" +REGISTRY_SCHEME="${REGISTRY_SCHEME:-https}" +REGISTRY_AUTH="${REGISTRY_AUTH:-}" +REGISTRY_SKIP_VERIFY="${REGISTRY_SKIP_VERIFY:-false}" +PROXY_FALLBACK="${PROXY_FALLBACK:-true}" +ENABLE_DEFAULT_ROUTE_WORKAROUND="${ENABLE_DEFAULT_ROUTE_WORKAROUND:-false}" +DIGEST_VALIDATE="${DIGEST_VALIDATE:-false}" +PREFETCH_ENABLE="${PREFETCH_ENABLE:-false}" +PREFETCH_THREADS="${PREFETCH_THREADS:-8}" +STREAM_PREFETCH="${STREAM_PREFETCH:-false}" +STREAM_PREFETCH_THREADS="${STREAM_PREFETCH_THREADS:-5}" +STREAM_PREFETCH_BANDWIDTH="${STREAM_PREFETCH_BANDWIDTH:-0}" +STREAM_PREFETCH_MAX_RETRY="${STREAM_PREFETCH_MAX_RETRY:-10}" +NYDUSD="/usr/local/bin/nydusd" + +mkdir -p "${WORK_DIR}" "${RESULTS_DIR}" "${BLOB_CACHE_DIR}" "${MOUNT_POINT}" + +if [ ! -x "${NYDUSD}" ]; then + die "nydusd is not executable: ${NYDUSD}" +fi +if ! NYDUSD_VERSION="$("${NYDUSD}" --version 2>&1 | tr '\n' ' ')"; then + die "failed to execute ${NYDUSD}: ${NYDUSD_VERSION}" +fi +log "Using nydusd binary: ${NYDUSD} (${NYDUSD_VERSION})" + +# If REGISTRY_AUTH is provided (base64 of "user:password"), materialise a +# docker config.json so `crane` (used by fetch-bootstrap) can authenticate +# against private registries. The nydusd registry backend already picks up +# REGISTRY_AUTH via the rendered config below. +if [ -n "${REGISTRY_AUTH}" ]; then + auth_host="${REGISTRY_HOST:-}" + if [ -z "${auth_host}" ] && [ -n "${NYDUS_IMAGE}" ]; then + ref="${NYDUS_IMAGE%@*}"; ref="${ref%:*}" + if [[ "${ref}" == */* ]]; then + first="${ref%%/*}" + if [[ "${first}" == *.* || "${first}" == *:* || "${first}" == "localhost" ]]; then + auth_host="${first}" + fi + fi + auth_host="${auth_host:-docker.io}" + fi + export DOCKER_CONFIG="${WORK_DIR}/.docker" + mkdir -p "${DOCKER_CONFIG}" + jq -n --arg host "${auth_host}" --arg auth "${REGISTRY_AUTH}" \ + '{auths: {($host): {auth: $auth}}}' \ + > "${DOCKER_CONFIG}/config.json" + chmod 600 "${DOCKER_CONFIG}/config.json" + log "Wrote registry credentials for ${auth_host} to ${DOCKER_CONFIG}/config.json" +fi + +APISOCK="${WORK_DIR}/api.sock" +NYDUSD_LOG="${WORK_DIR}/nydusd.log" +RESULT_JSON="${RESULTS_DIR}/result.json" + +# ---- Phase 1: resolve config ---------------------------------------------- +if [ -n "${NYDUSD_CONFIG}" ] && [ -f "${NYDUSD_CONFIG}" ]; then + CONFIG_PATH="${NYDUSD_CONFIG}" + log "Using user-supplied nydusd config: ${CONFIG_PATH}" +else + [ -n "${NYDUS_IMAGE}" ] || die "either NYDUSD_CONFIG or NYDUS_IMAGE must be set" + + # Parse NYDUS_IMAGE into REGISTRY_HOST and REGISTRY_REPO. The first path + # segment is the host iff it contains '.' or ':' or equals 'localhost'; + # otherwise we default to docker.io with the 'library/' prefix when only + # a single name segment is present (matching docker's reference parser). + REF="${NYDUS_IMAGE%@*}" + REF="${REF%:*}" + if [[ "${REF}" == */* ]]; then + first="${REF%%/*}"; rest="${REF#*/}" + if [[ "${first}" == *.* || "${first}" == *:* || "${first}" == "localhost" ]]; then + REGISTRY_HOST="${REGISTRY_HOST:-${first}}" + REGISTRY_REPO="${REGISTRY_REPO:-${rest}}" + else + REGISTRY_HOST="${REGISTRY_HOST:-docker.io}" + REGISTRY_REPO="${REGISTRY_REPO:-${REF}}" + fi + else + REGISTRY_HOST="${REGISTRY_HOST:-docker.io}" + REGISTRY_REPO="${REGISTRY_REPO:-library/${REF}}" + fi + export REGISTRY_HOST REGISTRY_REPO REGISTRY_SCHEME REGISTRY_AUTH \ + REGISTRY_SKIP_VERIFY PROXY_FALLBACK \ + DRAGONFLY_PROXY_URL DRAGONFLY_SCHEDULER_ENDPOINT \ + BLOB_CACHE_DIR DIGEST_VALIDATE PREFETCH_ENABLE PREFETCH_THREADS \ + STREAM_PREFETCH STREAM_PREFETCH_THREADS STREAM_PREFETCH_BANDWIDTH \ + STREAM_PREFETCH_MAX_RETRY + + CONFIG_PATH="${WORK_DIR}/nydusd.json" + envsubst < /etc/nydus/config.template.json > "${CONFIG_PATH}" + log "Rendered config -> ${CONFIG_PATH}" + log " registry: ${REGISTRY_SCHEME}://${REGISTRY_HOST}/${REGISTRY_REPO}" + log " proxy: ${DRAGONFLY_PROXY_URL} scheduler: ${DRAGONFLY_SCHEDULER_ENDPOINT}" +fi +if [ "${ENABLE_DEFAULT_ROUTE_WORKAROUND}" = "true" ]; then + ensure_default_ipv4_route +else + log "Default IPv4 route workaround disabled" +fi + +# ---- Phase 2: resolve bootstrap ------------------------------------------- +if [ -n "${BOOTSTRAP_PATH}" ] && [ -f "${BOOTSTRAP_PATH}" ]; then + log "Using user-supplied bootstrap: ${BOOTSTRAP_PATH}" +else + [ -n "${NYDUS_IMAGE}" ] || die "BOOTSTRAP_PATH not set and NYDUS_IMAGE empty; cannot fetch bootstrap" + log "Fetching bootstrap from ${NYDUS_IMAGE} (platform=${PLATFORM})" + BOOTSTRAP_PATH="$(NYDUS_IMAGE="${NYDUS_IMAGE}" PLATFORM="${PLATFORM}" \ + WORK_DIR="${WORK_DIR}" /usr/local/bin/fetch-bootstrap)" +fi + +# ---- Phase 3: start nydusd ------------------------------------------------- +log "Starting nydusd: binary=${NYDUSD} bootstrap=${BOOTSTRAP_PATH} mountpoint=${MOUNT_POINT}" +T_DAEMON_START=$(date +%s.%N) + +"${NYDUSD}" \ + --config "${CONFIG_PATH}" \ + --bootstrap "${BOOTSTRAP_PATH}" \ + --mountpoint "${MOUNT_POINT}" \ + --apisock "${APISOCK}" \ + --log-level "${NYDUSD_LOG_LEVEL}" \ + > "${NYDUSD_LOG}" 2>&1 & +NYDUSD_PID=$! + +cleanup() { + rc=$? + log "Cleanup (rc=${rc})" + if mountpoint -q "${MOUNT_POINT}" 2>/dev/null; then + umount "${MOUNT_POINT}" 2>/dev/null || umount -l "${MOUNT_POINT}" 2>/dev/null || true + fi + if kill -0 "${NYDUSD_PID}" 2>/dev/null; then + kill "${NYDUSD_PID}" 2>/dev/null || true + wait "${NYDUSD_PID}" 2>/dev/null || true + fi + if [ "${rc}" -ne 0 ] && [ -f "${NYDUSD_LOG}" ]; then + log "--- nydusd.log (tail) ---" + tail -n 80 "${NYDUSD_LOG}" >&2 || true + fi +} +trap cleanup EXIT + +# ---- Phase 4: wait for readiness ------------------------------------------ +log "Waiting up to ${MOUNT_READY_TIMEOUT}s for FUSE mount and daemon RUNNING state..." +T_MOUNT_READY="" +deadline=$(( $(date +%s) + MOUNT_READY_TIMEOUT )) +while [ "$(date +%s)" -lt "${deadline}" ]; do + if ! kill -0 "${NYDUSD_PID}" 2>/dev/null; then + die "nydusd exited prematurely (see ${NYDUSD_LOG})" + fi + if mountpoint -q "${MOUNT_POINT}" && [ -S "${APISOCK}" ]; then + state=$(nydusctl --sock "${APISOCK}" --raw info 2>/dev/null \ + | jq -r '.state // ""' 2>/dev/null || true) + if [ "${state}" = "RUNNING" ] || [ "${state}" = "Running" ]; then + T_MOUNT_READY=$(date +%s.%N); break + fi + fi + sleep 0.2 +done +[ -n "${T_MOUNT_READY}" ] || die "timed out waiting for nydusd to become RUNNING" + +MOUNT_READY_SEC=$(awk -v a="${T_MOUNT_READY}" -v b="${T_DAEMON_START}" 'BEGIN{printf "%.3f", a-b}') +log "Mount ready in ${MOUNT_READY_SEC}s" + +# ---- Phase 5: workload ----------------------------------------------------- +log "Running workload (parallelism=${READ_PARALLELISM}, chunk=${READ_CHUNK_SIZE} bytes, max_files=${MAX_FILES})" +WORKLOAD_OUT="${WORK_DIR}/workload.json" +T_WORKLOAD_START=$(date +%s.%N) +set +e +workload \ + --root "${MOUNT_POINT}" \ + --parallelism "${READ_PARALLELISM}" \ + --chunk-size "${READ_CHUNK_SIZE}" \ + --max-files "${MAX_FILES}" \ + --output "${WORKLOAD_OUT}" +WORKLOAD_RC=$? +set -e +T_WORKLOAD_END=$(date +%s.%N) +WORKLOAD_SEC=$(awk -v a="${T_WORKLOAD_END}" -v b="${T_WORKLOAD_START}" 'BEGIN{printf "%.3f", a-b}') +log "Workload finished in ${WORKLOAD_SEC}s (rc=${WORKLOAD_RC})" + +# ---- Phase 6: scrape metrics + emit summary ------------------------------- +# Each scrape must produce valid JSON for jq's --slurpfile to work. +scrape() { + local out + out=$(nydusctl --sock "${APISOCK}" --raw "$@" 2>/dev/null) || out="" + if [ -z "${out}" ] || ! printf '%s' "${out}" | jq -e . >/dev/null 2>&1; then + echo "{}" + else + printf '%s' "${out}" + fi +} +echo "$(scrape info)" > "${WORK_DIR}/info.json" +echo "$(scrape metrics backend)" > "${WORK_DIR}/backend.json" +echo "$(scrape metrics cache)" > "${WORK_DIR}/cache.json" +echo "$(scrape metrics fsstats)" > "${WORK_DIR}/fsstats.json" + +[ -f "${WORKLOAD_OUT}" ] || echo '{}' > "${WORKLOAD_OUT}" + +jq -n \ + --arg image "${NYDUS_IMAGE:-}" \ + --arg platform "${PLATFORM}" \ + --arg config_path "${CONFIG_PATH}" \ + --arg bootstrap_path "${BOOTSTRAP_PATH}" \ + --arg nydusd_bin "${NYDUSD}" \ + --arg nydusd_version "${NYDUSD_VERSION}" \ + --arg proxy_url "${DRAGONFLY_PROXY_URL}" \ + --arg scheduler "${DRAGONFLY_SCHEDULER_ENDPOINT}" \ + --argjson proxy_fb "$([ "${PROXY_FALLBACK}" = "true" ] && echo true || echo false)" \ + --argjson mount_ready "${MOUNT_READY_SEC}" \ + --argjson workload_sec "${WORKLOAD_SEC}" \ + --argjson workload_rc "${WORKLOAD_RC}" \ + --slurpfile workload "${WORKLOAD_OUT}" \ + --slurpfile info "${WORK_DIR}/info.json" \ + --slurpfile backend "${WORK_DIR}/backend.json" \ + --slurpfile blobcache "${WORK_DIR}/cache.json" \ + --slurpfile fs "${WORK_DIR}/fsstats.json" \ + '{ + image: $image, + platform: $platform, + config_path: $config_path, + bootstrap_path: $bootstrap_path, + dragonfly: { proxy_url: $proxy_url, scheduler_endpoint: $scheduler, proxy_fallback: $proxy_fb }, + timing_sec: { mount_ready: $mount_ready, workload: $workload_sec }, + workload_rc: $workload_rc, + workload: ($workload[0] // {}), + nydusd: { + binary: $nydusd_bin, + version: $nydusd_version, + info: ($info[0] // {}), + backend: ($backend[0] // {}), + blobcache: ($blobcache[0] // {}), + fs: ($fs[0] // {}) + } + }' > "${RESULT_JSON}" + +log "Wrote summary to ${RESULT_JSON}" +echo "================ PERF TEST SUMMARY ================" >&2 +jq -r ' + "image : \(.image)", + "nydusd_binary : \(.nydusd.binary)", + "nydusd_version : \(.nydusd.version)", + "mount_ready_sec : \(.timing_sec.mount_ready)", + "workload_sec : \(.timing_sec.workload)", + "files_read : \(.workload.files_read // 0) (skipped=\(.workload.files_skipped // 0), errors=\(.workload.files_errored // 0))", + "bytes_read : \(.workload.bytes_read // 0)", + "throughput_MBps : \(.workload.throughput_mbps // 0)", + "latency_ms p50/p95/p99 : \(.workload.latency_ms.p50 // 0) / \(.workload.latency_ms.p95 // 0) / \(.workload.latency_ms.p99 // 0)", + "--- blob cache ---", + "cache_hit_ratio : \( + if (.nydusd.blobcache.total // 0) > 0 then + ((.nydusd.blobcache.partial_hits // 0) + (.nydusd.blobcache.whole_hits // 0)) + / .nydusd.blobcache.total * 100 + | . * 10 | round / 10 | tostring + "%" + else "n/a (no cache reads)" + end + ) (partial=\(.nydusd.blobcache.partial_hits // 0), whole=\(.nydusd.blobcache.whole_hits // 0), total=\(.nydusd.blobcache.total // 0))", + "cache_entries : \(.nydusd.blobcache.entries_count // 0) chunks", + "--- prefetch ---", + "prefetch_data_MB : \( + (.nydusd.blobcache.prefetch_data_amount // 0) as $pd | + (.nydusd.backend.read_amount_total // 0) as $net | + ($pd / 1000000 | . * 100 | round / 100 | tostring) + + if $net > 0 and $pd > 0 then + " (\($pd / $net * 100 | . * 10 | round / 10)% of backend traffic)" + else "" + end + )", + "prefetch_avg_merge_KB : \( + (.nydusd.blobcache.prefetch_requests_count // 0) as $rc | + if $rc > 0 then + (.nydusd.blobcache.prefetch_data_amount // 0) / $rc / 1000 + | . * 10 | round / 10 + else "n/a" + end + ) (requests=\(.nydusd.blobcache.prefetch_requests_count // 0), unmerged_chunks=\(.nydusd.blobcache.prefetch_unmerged_chunks // 0))", + "prefetch_avg_latency_ms : \( + (.nydusd.blobcache.prefetch_requests_count // 0) as $rc | + if $rc > 0 then + (.nydusd.blobcache.prefetch_cumulative_time_millis // 0) / $rc + | . * 10 | round / 10 + else "n/a" + end + )", + "prefetch_bandwidth_MBps : \( + ((.nydusd.blobcache.prefetch_end_time_secs // 0) + + (.nydusd.blobcache.prefetch_end_time_millis // 0) / 1000) as $end | + ((.nydusd.blobcache.prefetch_begin_time_secs // 0) + + (.nydusd.blobcache.prefetch_begin_time_millis // 0) / 1000) as $begin | + ($end - $begin) as $dur | + (.nydusd.blobcache.prefetch_data_amount // 0) as $pd | + if $dur > 0 and $pd > 0 then + $pd / 1000000 / $dur | . * 100 | round / 100 + else "n/a" + end + )", + "--- io interaction ---", + "io_breakdown (prefetch/ondemand/total): \( + (.nydusd.blobcache.prefetch_data_amount // 0) as $pd | + (.nydusd.backend.read_amount_total // 0) as $net | + ($net - $pd) as $od | + ($pd / 1000000 | . * 100 | round / 100 | tostring) + " MB / " + + ($od / 1000000 | . * 100 | round / 100 | tostring) + " MB / " + + ($net / 1000000 | . * 100 | round / 100 | tostring) + " MB" + + if $net > 0 then + " (prefetch_share=\($pd / $net * 100 | . * 10 | round / 10)%)" + else "" + end + )", + "ondemand_backend_reads : \( + (.nydusd.backend.read_count // 0) as $total_reads | + (.nydusd.blobcache.prefetch_requests_count // 0) as $prefetch_reads | + [($total_reads - $prefetch_reads), 0] | max as $od_reads | + ($od_reads | tostring) + + if $total_reads > 0 then + " (\($od_reads / $total_reads * 100 | . * 10 | round / 10)% of total backend ops triggered by cache misses)" + else "" + end + )", + "prefetch_data_ready: \(.nydusd.blobcache.data_all_ready // false)", + "--- backend ---", + "backend_io_size p50/p95/p99 : \( + (.nydusd.backend.read_count_block_size_dist // [0,0,0,0,0,0,0,0]) as $d | + ($d | add // 0) as $n | + if $n == 0 then "n/a (no backend reads)" + else + (reduce range(8) as $i ({cum: [], s: 0}; .s += $d[$i] | .cum += [.s])) | + .cum as $c | + (["<1K","1-4K","4-16K","16-64K","64-128K","128-512K","512K-1M",">=1M"]) as $lbl | + (($n * 0.50) as $t | first(range(8) | select($c[.] >= $t))) as $p50 | + (($n * 0.95) as $t | first(range(8) | select($c[.] >= $t))) as $p95 | + (($n * 0.99) as $t | first(range(8) | select($c[.] >= $t))) as $p99 | + "\($lbl[$p50]) / \($lbl[$p95]) / \($lbl[$p99])" + end + ) (total_backend_reads=\((.nydusd.backend.read_count // 0)))", + "backend_avg_latency_ms : \( + (.nydusd.backend.read_count // 0) as $rc | + if $rc > 0 then + (.nydusd.backend.read_cumulative_latency_millis_total // 0) / $rc + | . * 10 | round / 10 + else "n/a (no backend reads)" + end + )", + "backend_fetch_MB : \( + (.nydusd.backend.read_amount_total // 0) / 1000000 + | . * 100 | round / 100 + ) (errors=\(.nydusd.backend.read_errors // 0))", + "network_efficiency: \( + (.nydusd.backend.read_amount_total // 0) as $net | + (.nydusd.fs.data_read // 0) as $app | + if $net > 0 then + ($app / $net | . * 100 | round / 100 | tostring) + "x (app_bytes=\($app), backend_bytes=\($net))" + else "n/a (no backend reads)" + end + )", + "workload_rc : \(.workload_rc)" +' "${RESULT_JSON}" >&2 +echo "===================================================" >&2 + +exit "${WORKLOAD_RC}" diff --git a/misc/perftest/fetch-bootstrap.sh b/misc/perftest/fetch-bootstrap.sh new file mode 100755 index 00000000000..6936143178e --- /dev/null +++ b/misc/perftest/fetch-bootstrap.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# +# Resolve and extract the Nydus bootstrap (image.boot) for a Nydus image. +# +# Inputs (env): +# NYDUS_IMAGE full image reference, e.g. ghcr.io/foo/bar:nydus-latest +# PLATFORM OCI platform selector for index manifests (e.g. linux/amd64) +# WORK_DIR scratch directory +# +# Output: +# On success, writes the bootstrap blob to "$WORK_DIR/bootstrap" and prints +# that path to stdout. +# +# Notes: +# The bootstrap is identified by the layer annotation +# containerd.io/snapshot/nydus-bootstrap=true +# and falls back to "the first non-blob nydus layer" if the annotation is +# missing (matches the convention used in the repo's e2e workflow). +# The blob is a tar.gz containing image.boot (or *.boot) which is what +# nydusd actually consumes. +set -euo pipefail + +: "${NYDUS_IMAGE:?NYDUS_IMAGE must be set}" +: "${WORK_DIR:?WORK_DIR must be set}" +PLATFORM="${PLATFORM:-linux/amd64}" + +mkdir -p "${WORK_DIR}" +MANIFEST="${WORK_DIR}/manifest.json" +LAYER_TGZ="${WORK_DIR}/bootstrap-layer.tar.gz" +EXTRACT_DIR="${WORK_DIR}/bootstrap-extract" +OUT="${WORK_DIR}/bootstrap" + +echo "[fetch-bootstrap] image=${NYDUS_IMAGE} platform=${PLATFORM}" >&2 + +crane manifest --platform "${PLATFORM}" "${NYDUS_IMAGE}" > "${MANIFEST}" + +# Identify the bootstrap layer. Prefer the explicit annotation, then fall +# back to the first layer whose mediaType mentions "nydus" but not "blob" +# (matches the convention used in this repo's e2e workflow). +BOOTSTRAP_DIGEST=$(jq -r ' + (.layers[]? + | select(.annotations["containerd.io/snapshot/nydus-bootstrap"] == "true") + | .digest) // ( + .layers[]? + | select((.mediaType // "") | (contains("nydus") and (contains("blob") | not))) + | .digest) +' "${MANIFEST}" | head -n1) + +if [ -z "${BOOTSTRAP_DIGEST}" ] || [ "${BOOTSTRAP_DIGEST}" = "null" ]; then + echo "[fetch-bootstrap] ERROR: no bootstrap layer in manifest" >&2 + cat "${MANIFEST}" >&2 + exit 1 +fi + +echo "[fetch-bootstrap] bootstrap layer digest=${BOOTSTRAP_DIGEST}" >&2 +crane blob "${NYDUS_IMAGE}@${BOOTSTRAP_DIGEST}" > "${LAYER_TGZ}" + +rm -rf "${EXTRACT_DIR}" +mkdir -p "${EXTRACT_DIR}" +if ! tar -xf "${LAYER_TGZ}" -C "${EXTRACT_DIR}" 2>/dev/null; then + echo "[fetch-bootstrap] ERROR: failed to untar bootstrap layer" >&2 + file "${LAYER_TGZ}" >&2 || true + exit 1 +fi + +BOOTSTRAP_FILE=$(find "${EXTRACT_DIR}" \( -name 'image.boot' -o -name '*.boot' \) -type f | head -1) +if [ -z "${BOOTSTRAP_FILE}" ]; then + echo "[fetch-bootstrap] ERROR: no .boot file in extracted layer" >&2 + find "${EXTRACT_DIR}" >&2 + exit 1 +fi + +cp "${BOOTSTRAP_FILE}" "${OUT}" +echo "[fetch-bootstrap] wrote ${OUT} ($(stat -c%s "${OUT}") bytes)" >&2 +echo "${OUT}" diff --git a/misc/perftest/pod.yaml b/misc/perftest/pod.yaml new file mode 100644 index 00000000000..505021df59b --- /dev/null +++ b/misc/perftest/pod.yaml @@ -0,0 +1,130 @@ +apiVersion: v1 +kind: Pod +metadata: + name: nydus-perftest + labels: + app: nydus-perftest +spec: + # The perftest needs outbound registry access and Dragonfly SDK mode needs a + # discoverable local IP address. hostNetwork avoids CNI/network-policy setups + # that leave this privileged benchmark pod without a default route. + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + restartPolicy: Never + containers: + - name: perftest + image: nydus-perftest:latest + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + env: + # Image/bootstrap/config selection. + - name: NYDUS_IMAGE + value: "ghcr.io/dragonflyoss/image-service/nginx:nydus-latest" + - name: NYDUSD_CONFIG + value: "" + - name: BOOTSTRAP_PATH + value: "" + - name: PLATFORM + value: "linux/amd64" + + # Registry and Dragonfly proxy config used when NYDUSD_CONFIG is empty. + - name: REGISTRY_HOST + value: "" + - name: REGISTRY_REPO + value: "" + - name: REGISTRY_SCHEME + value: "https" + - name: REGISTRY_AUTH + value: "" + - name: REGISTRY_SKIP_VERIFY + value: "false" + - name: DRAGONFLY_PROXY_URL + value: "http://dragonfly-dfdaemon.dragonfly-system.svc.cluster.local:4001" + - name: DRAGONFLY_SCHEDULER_ENDPOINT + value: "http://dragonfly-scheduler.dragonfly-system.svc.cluster.local:8002" + - name: PROXY_FALLBACK + value: "true" + - name: ENABLE_DEFAULT_ROUTE_WORKAROUND + value: "false" + - name: DIGEST_VALIDATE + value: "false" + - name: PREFETCH_ENABLE + value: "false" + - name: PREFETCH_THREADS + value: "8" + - name: STREAM_PREFETCH + value: "false" + - name: STREAM_PREFETCH_THREADS + value: "5" + - name: STREAM_PREFETCH_BANDWIDTH + value: "0" + - name: STREAM_PREFETCH_MAX_RETRY + value: "10" + + # Runtime paths. + - name: MOUNT_POINT + value: "/mnt/nydus" + - name: RESULTS_DIR + value: "/results" + - name: WORK_DIR + value: "/tmp/nydus" + - name: BLOB_CACHE_DIR + value: "/var/lib/nydus/cache" + + # Workload knobs. + - name: READ_PARALLELISM + value: "16" + - name: READ_CHUNK_SIZE + value: "1048576" + - name: MAX_FILES + value: "0" + - name: MOUNT_READY_TIMEOUT + value: "60" + - name: NYDUSD_LOG_LEVEL + value: "info" + volumeMounts: + - name: dev-fuse + mountPath: /dev/fuse + - name: results + mountPath: /results + - name: cache + mountPath: /var/lib/nydus/cache + - name: work + mountPath: /tmp/nydus + # To test a locally built daemon, mount it over the bundled binary: + # - name: local-nydusd + # mountPath: /usr/local/bin/nydusd + # readOnly: true + # If NYDUSD_CONFIG or BOOTSTRAP_PATH is set, mount the referenced file: + # - name: nydusd-config + # mountPath: /etc/nydus/user.json + # subPath: user.json + # readOnly: true + # - name: bootstrap + # mountPath: /bootstrap/image.boot + # subPath: image.boot + # readOnly: true + volumes: + - name: dev-fuse + hostPath: + path: /dev/fuse + type: CharDevice + - name: results + emptyDir: {} + - name: cache + emptyDir: {} + - name: work + emptyDir: {} + # To test a locally built daemon, uncomment this hostPath volume and the + # matching mount above. The file must exist on the node that runs the pod. + # - name: local-nydusd + # hostPath: + # path: /path/on/node/target/release/nydusd + # type: File + # - name: nydusd-config + # configMap: + # name: nydus-perftest-config + # - name: bootstrap + # configMap: + # name: nydus-perftest-bootstrap diff --git a/misc/perftest/workload/go.mod b/misc/perftest/workload/go.mod new file mode 100644 index 00000000000..d2111232cf8 --- /dev/null +++ b/misc/perftest/workload/go.mod @@ -0,0 +1,3 @@ +module github.com/dragonflyoss/nydus/misc/perftest/workload + +go 1.22 diff --git a/misc/perftest/workload/main.go b/misc/perftest/workload/main.go new file mode 100644 index 00000000000..5383eefbed2 --- /dev/null +++ b/misc/perftest/workload/main.go @@ -0,0 +1,232 @@ +// workload runs a cold-cache parallel-read benchmark over a mounted Nydus +// filesystem and writes a JSON summary describing throughput and latency. +// +// It is the data-plane half of the nydus perftest image; the orchestrator +// (entrypoint.sh) is responsible for mounting nydusd before invoking this +// program. We deliberately use stdlib only so the binary can be built +// statically with `CGO_ENABLED=0 go build`. +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "sync" + "sync/atomic" + "time" +) + +type latencyStats struct { + Mean float64 `json:"mean"` + P50 float64 `json:"p50"` + P90 float64 `json:"p90"` + P95 float64 `json:"p95"` + P99 float64 `json:"p99"` +} + +type summary struct { + FilesSeen int `json:"files_seen"` + FilesRead int `json:"files_read"` + FilesSkipped int `json:"files_skipped"` + FilesErrored int `json:"files_errored"` + BytesRead int64 `json:"bytes_read"` + WallClockSec float64 `json:"wall_clock_sec"` + ThroughputMBps float64 `json:"throughput_mbps"` + Parallelism int `json:"parallelism"` + ChunkSize int `json:"chunk_size"` + LatencyMs latencyStats `json:"latency_ms"` + ErrorSamples []string `json:"error_samples"` +} + +func main() { + root := flag.String("root", "", "directory to walk and read") + parallelism := flag.Int("parallelism", 16, "concurrent file readers") + chunkSize := flag.Int("chunk-size", 1<<20, "bytes per read() call") + maxFiles := flag.Int("max-files", 0, "cap files read; 0 = no cap") + output := flag.String("output", "", "summary JSON output path") + flag.Parse() + if *root == "" || *output == "" { + fmt.Fprintln(os.Stderr, "usage: workload --root DIR --output PATH [--parallelism N] [--chunk-size N] [--max-files N]") + os.Exit(2) + } + if *parallelism < 1 { + *parallelism = 1 + } + + files, skipped, err := collectFiles(*root, *maxFiles) + if err != nil { + fmt.Fprintf(os.Stderr, "[workload] walk error: %v\n", err) + os.Exit(1) + } + fmt.Fprintf(os.Stderr, "[workload] %d files to read (%d non-regular skipped)\n", len(files), skipped) + + var ( + bytesRead int64 + errored int64 + latencies = make([]float64, 0, len(files)) + latMu sync.Mutex + errSamples []string + errSamplesMu sync.Mutex + work = make(chan string, len(files)) + wg sync.WaitGroup + ) + for _, p := range files { + work <- p + } + close(work) + + tStart := time.Now() + for w := 0; w < *parallelism; w++ { + wg.Add(1) + go func() { + defer wg.Done() + localBuf := make([]byte, *chunkSize) + for path := range work { + n, lat, rerr := readOne(path, localBuf) + if rerr != nil { + atomic.AddInt64(&errored, 1) + errSamplesMu.Lock() + if len(errSamples) < 10 { + errSamples = append(errSamples, fmt.Sprintf("%s: %v", path, rerr)) + } + errSamplesMu.Unlock() + continue + } + atomic.AddInt64(&bytesRead, n) + latMu.Lock() + latencies = append(latencies, lat.Seconds()) + latMu.Unlock() + } + }() + } + wg.Wait() + elapsed := time.Since(tStart).Seconds() + + s := summary{ + FilesSeen: len(files) + skipped, + FilesRead: len(files) - int(errored), + FilesSkipped: skipped, + FilesErrored: int(errored), + BytesRead: bytesRead, + WallClockSec: round(elapsed, 6), + Parallelism: *parallelism, + ChunkSize: *chunkSize, + ErrorSamples: errSamples, + } + if elapsed > 0 { + s.ThroughputMBps = round(float64(bytesRead)/1_000_000.0/elapsed, 3) + } + s.LatencyMs = computeLatency(latencies) + + out, err := os.Create(*output) + if err != nil { + fmt.Fprintf(os.Stderr, "[workload] cannot write %s: %v\n", *output, err) + os.Exit(1) + } + enc := json.NewEncoder(out) + enc.SetIndent("", " ") + if err := enc.Encode(&s); err != nil { + fmt.Fprintf(os.Stderr, "[workload] encode error: %v\n", err) + os.Exit(1) + } + out.Close() + + fmt.Fprintf(os.Stderr, + "[workload] done: %d files, %d bytes, %.2f MB/s, p95=%.2fms\n", + s.FilesRead, s.BytesRead, s.ThroughputMBps, s.LatencyMs.P95) + + if errored > 0 && s.FilesRead == 0 { + os.Exit(1) + } +} + +func collectFiles(root string, maxFiles int) ([]string, int, error) { + var files []string + skipped := 0 + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + skipped++ + return nil + } + if d.IsDir() { + return nil + } + if !d.Type().IsRegular() { + skipped++ + return nil + } + files = append(files, path) + if maxFiles > 0 && len(files) >= maxFiles { + return filepath.SkipAll + } + return nil + }) + return files, skipped, err +} + +func readOne(path string, buf []byte) (int64, time.Duration, error) { + t0 := time.Now() + f, err := os.Open(path) + if err != nil { + return 0, time.Since(t0), err + } + defer f.Close() + var total int64 + for { + n, rerr := f.Read(buf) + total += int64(n) + if rerr == io.EOF { + break + } + if rerr != nil { + return total, time.Since(t0), rerr + } + } + return total, time.Since(t0), nil +} + +func computeLatency(secs []float64) latencyStats { + if len(secs) == 0 { + return latencyStats{} + } + sorted := make([]float64, len(secs)) + copy(sorted, secs) + sort.Float64s(sorted) + var sum float64 + for _, v := range secs { + sum += v + } + pick := func(p float64) float64 { + idx := int(p * float64(len(sorted)-1)) + if idx < 0 { + idx = 0 + } + if idx >= len(sorted) { + idx = len(sorted) - 1 + } + return sorted[idx] * 1000.0 + } + return latencyStats{ + Mean: round(sum/float64(len(secs))*1000.0, 3), + P50: round(pick(0.50), 3), + P90: round(pick(0.90), 3), + P95: round(pick(0.95), 3), + P99: round(pick(0.99), 3), + } +} + +func round(v float64, places int) float64 { + scale := 1.0 + for i := 0; i < places; i++ { + scale *= 10 + } + if v >= 0 { + return float64(int64(v*scale+0.5)) / scale + } + return float64(int64(v*scale-0.5)) / scale +} diff --git a/rafs/src/prefetch.rs b/rafs/src/prefetch.rs index 15cd5f7582d..150066c2ddf 100644 --- a/rafs/src/prefetch.rs +++ b/rafs/src/prefetch.rs @@ -14,13 +14,14 @@ use std::collections::BTreeMap; use std::io::Read; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Arc, Condvar, Mutex, Once}; use std::thread; -use std::time::{Duration, Instant}; +use std::time::{Duration, Instant, SystemTime}; use nydus_storage::backend::RequestSource; use nydus_storage::cache::BlobCache; use nydus_storage::device::{BlobChunkInfo, BlobInfo}; +use nydus_utils::metrics::Metric; use crate::metadata::{RafsInodeExt, RafsSuper}; @@ -110,6 +111,7 @@ struct State { threads_count: usize, rate_limiter: Option>>, max_retry_per_blob: u64, + begin_timing_once: Once, } /// Streaming blob prefetcher that downloads entire blobs via rangeless GET @@ -149,6 +151,7 @@ impl BlobPrefetcher { threads_count, rate_limiter, max_retry_per_blob: DEFAULT_MAX_RETRY, + begin_timing_once: Once::new(), }), }) } @@ -445,6 +448,20 @@ impl BlobPrefetcher { chunk_status: &mut [bool], ) -> anyhow::Result<()> { let blob_id = blob.info.blob_id(); + let stream_start = SystemTime::now(); + + // Record the wall-clock start of prefetch on the first blob streamed. + if let Some(metrics) = cache.metrics() { + state.begin_timing_once.call_once(|| { + if let Ok(t) = stream_start.duration_since(SystemTime::UNIX_EPOCH) { + metrics.prefetch_begin_time_secs.set(t.as_secs()); + metrics + .prefetch_begin_time_millis + .set(t.subsec_millis() as u64); + } + }); + } + let last_chunk_end = blob .chunks .iter() @@ -565,6 +582,18 @@ impl BlobPrefetcher { "BlobPrefetcher: streamed blob {}, cached {} chunks", blob_id, chunks_cached ); + + // Update BlobcacheMetrics to include this blob in the prefetch statistics. + // One blob stream = one backend request (rangeless GET), regardless of how + // many chunks were extracted from it. prefetch_data_amount is accumulated + // per chunk in cache_chunk_data; here we count the request and update timing. + if chunks_cached > 0 { + if let Some(metrics) = cache.metrics() { + metrics.prefetch_requests_count.inc(); + metrics.calculate_prefetch_metrics(stream_start); + } + } + Ok(()) } } @@ -582,7 +611,7 @@ mod tests { }; use nydus_storage::{StorageError, StorageResult}; use nydus_utils::crypt::{Cipher, CipherContext}; - use nydus_utils::metrics::BackendMetrics; + use nydus_utils::metrics::{BackendMetrics, BlobcacheMetrics, Metric}; use nydus_utils::{compress, crypt, digest}; use crate::mock::MockChunkInfo; @@ -650,6 +679,7 @@ mod tests { /// Whether `cache_chunk_data` returns `Ok(true)` (true) or `Err` (false). cache_succeeds: bool, cache_calls: Arc, + blobcache_metrics: Arc, } impl BlobCache for MockBlobCache { @@ -719,10 +749,13 @@ mod tests { fn cache_chunk_data( &self, _chunk: &dyn BlobChunkInfo, - _data: &[u8], + data: &[u8], ) -> std::io::Result { self.cache_calls.fetch_add(1, Ordering::Relaxed); if self.cache_succeeds { + self.blobcache_metrics + .prefetch_data_amount + .add(data.len() as u64); Ok(true) } else { Err(std::io::Error::new( @@ -731,6 +764,10 @@ mod tests { )) } } + + fn metrics(&self) -> Option> { + Some(self.blobcache_metrics.clone()) + } } // ── LimitedReader ───────────────────────────────────────────────────────── @@ -762,6 +799,7 @@ mod tests { threads_count: 2, rate_limiter: None, max_retry_per_blob: 3, + begin_timing_once: Once::new(), }) } @@ -785,11 +823,13 @@ mod tests { let stream_calls = Arc::new(AtomicUsize::new(0)); let cache_calls = Arc::new(AtomicUsize::new(0)); let reader = Arc::new(MockBlobReader::new(stream_data, Arc::clone(&stream_calls))); + let blobcache_metrics = BlobcacheMetrics::new("mock", "/tmp"); let cache = Arc::new(MockBlobCache { chunk_map: Arc::new(MockChunkMap { ready }), reader, cache_succeeds, cache_calls: Arc::clone(&cache_calls), + blobcache_metrics, }); (cache, stream_calls, cache_calls) } @@ -1088,6 +1128,250 @@ mod tests { // ── prefetch_one_blob ──────────────────────────────────────────────────── + // ── BlobcacheMetrics instrumentation ───────────────────────────────────── + + #[test] + fn test_metrics_prefetch_data_amount_updated_per_cached_chunk() { + // Each newly cached chunk must add its compressed size to prefetch_data_amount. + let state = make_state(false); + let chunk_size = 20usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data: Vec = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_data_amount.count(), + chunk_size as u64, + "prefetch_data_amount must equal the compressed chunk size" + ); + } + + #[test] + fn test_metrics_prefetch_data_amount_accumulates_across_chunks() { + // With two chunks cached, prefetch_data_amount must be the sum of both sizes. + let state = make_state(false); + let sz0 = 8usize; + let sz1 = 12usize; + let chunk0 = Arc::new(MockChunkInfo::mock(0, 0, sz0 as u32, 0, sz0 as u32)); + let chunk1 = Arc::new(MockChunkInfo::mock(0, 8, sz1 as u32, 0, sz1 as u32)); + let blob = make_blob_work(vec![chunk0, chunk1]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 2]; + let data: Vec = vec![0u8; sz0 + sz1]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_data_amount.count(), + (sz0 + sz1) as u64, + "prefetch_data_amount must be the sum of all cached chunk sizes" + ); + } + + #[test] + fn test_metrics_prefetch_data_amount_not_updated_on_cache_error() { + // When cache_chunk_data returns Err, prefetch_data_amount must stay zero. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], false); // cache_succeeds = false + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_data_amount.count(), + 0, + "prefetch_data_amount must not be updated when caching fails" + ); + } + + #[test] + fn test_metrics_prefetch_requests_count_incremented_per_blob() { + // After streaming one blob with at least one newly cached chunk, + // prefetch_requests_count must be exactly 1. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_requests_count.count(), + 1, + "exactly one blob stream = one prefetch request" + ); + } + + #[test] + fn test_metrics_prefetch_requests_count_zero_when_no_chunks_cached() { + // If nothing gets cached (empty stream), prefetch_requests_count must stay 0. + let state = make_state(false); + let chunk = Arc::new(MockChunkInfo::mock(0, 0, 10, 0, 10)); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); // empty stream data + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let reader: Box = Box::new(std::io::Cursor::new(vec![])); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert_eq!( + metrics.prefetch_requests_count.count(), + 0, + "no chunks cached → no prefetch request counted" + ); + } + + #[test] + fn test_metrics_prefetch_begin_time_set_on_first_blob() { + // The prefetch_begin_time_secs must be non-zero after streaming the first blob. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert!( + metrics.prefetch_begin_time_secs.count() > 0, + "prefetch_begin_time_secs must be set after the first blob stream" + ); + } + + #[test] + fn test_metrics_prefetch_begin_time_set_only_once() { + // Calling stream_and_cache twice on the same state must set begin_time only + // on the first call (the Once cell prevents overwriting). + let state = make_state(false); + let chunk_size = 10usize; + let make_chunk = || { + Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )) + }; + let blob1 = make_blob_work(vec![make_chunk()]); + let blob2 = make_blob_work(vec![make_chunk()]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + + let reader1: Box = Box::new(std::io::Cursor::new(vec![0u8; chunk_size])); + let mut status1 = vec![false; 1]; + BlobPrefetcher::stream_and_cache(&state, reader1, &blob1, &cache_arc, 0, &mut status1) + .unwrap(); + + let first_begin = metrics.prefetch_begin_time_secs.count(); + + // Small delay to ensure a different timestamp would be written if Once didn't guard it. + std::thread::sleep(Duration::from_millis(10)); + + let reader2: Box = Box::new(std::io::Cursor::new(vec![0u8; chunk_size])); + let mut status2 = vec![false; 1]; + BlobPrefetcher::stream_and_cache(&state, reader2, &blob2, &cache_arc, 0, &mut status2) + .unwrap(); + + assert_eq!( + metrics.prefetch_begin_time_secs.count(), + first_begin, + "begin time must not be overwritten on subsequent blob streams" + ); + } + + #[test] + fn test_metrics_prefetch_end_time_set_after_streaming() { + // prefetch_end_time_secs must be non-zero after a blob is streamed. + let state = make_state(false); + let chunk_size = 10usize; + let chunk = Arc::new(MockChunkInfo::mock( + 0, + 0, + chunk_size as u32, + 0, + chunk_size as u32, + )); + let blob = make_blob_work(vec![chunk]); + let (cache, _, _) = make_cache(false, vec![], true); + let metrics = cache.blobcache_metrics.clone(); + let cache_arc: Arc = cache; + let mut status = vec![false; 1]; + let data = vec![0u8; chunk_size]; + let reader: Box = Box::new(std::io::Cursor::new(data)); + + BlobPrefetcher::stream_and_cache(&state, reader, &blob, &cache_arc, 0, &mut status) + .unwrap(); + + assert!( + metrics.prefetch_end_time_secs.count() > 0, + "prefetch_end_time_secs must be set after streaming" + ); + } + + // ── prefetch_one_blob ──────────────────────────────────────────────────── + #[test] fn test_prefetch_one_blob_all_chunks_ready() { // When every chunk is already cached (is_ready = true), no stream_read diff --git a/storage/src/cache/cachedfile.rs b/storage/src/cache/cachedfile.rs index fa4af42e5e1..c7e45baf1a2 100644 --- a/storage/src/cache/cachedfile.rs +++ b/storage/src/cache/cachedfile.rs @@ -557,6 +557,10 @@ impl BlobCache for FileCacheEntry { &self.blob_id } + fn metrics(&self) -> Option> { + Some(self.metrics.clone()) + } + fn blob_uncompressed_size(&self) -> Result { Ok(self.blob_uncompressed_size) } @@ -877,6 +881,11 @@ impl BlobCache for FileCacheEntry { })(); self.update_chunk_pending_status(chunk, result.is_ok()); + if result.is_ok() { + self.metrics + .prefetch_data_amount + .add(compressed_data.len() as u64); + } result.map(|_| true) } } diff --git a/storage/src/cache/mod.rs b/storage/src/cache/mod.rs index 5918cc82872..e6945a1f998 100644 --- a/storage/src/cache/mod.rs +++ b/storage/src/cache/mod.rs @@ -24,6 +24,7 @@ use std::time::Instant; use fuse_backend_rs::file_buf::FileVolatileSlice; use nydus_utils::compress::zlib_random::ZranDecoder; use nydus_utils::crypt::{self, Cipher, CipherContext}; +use nydus_utils::metrics::BlobcacheMetrics; use nydus_utils::{compress, digest}; use crate::backend::{BlobBackend, BlobReader, RequestSource}; @@ -429,6 +430,12 @@ pub trait BlobCache: Send + Sync { fn get_blob_meta_info(&self) -> Result>> { Ok(None) } + + /// Get the [BlobcacheMetrics](../../nydus_utils/metrics/struct.BlobcacheMetrics.html) for this + /// cache object. Returns `None` for cache implementations that do not track metrics. + fn metrics(&self) -> Option> { + None + } } /// An iterator to enumerate decompressed data for chunks.