diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0dc56632c..b04d4805d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -293,19 +293,6 @@ jobs: run: | docker run --rm -t ${{ github.repository_owner }}/machine-emulator:tests cartesi-machine-tests --concurrency=update_hash_tree:1 --test="^rv64ui.*$" --jobs=$(nproc) run_host_and_uarch - - name: Create uarch json logs to be used to test the Solidity based microarchitecture interpreter - run: | - docker run --name uarch-logs -w /tmp -t ${{ github.repository_owner }}/machine-emulator:tests /usr/share/cartesi-machine/tests/scripts/collect-uarch-test-logs.sh - docker cp uarch-logs:/tmp/uarch-riscv-tests-json-logs.tar.gz . - docker rm uarch-logs - - - name: Upload uarch json logs to be used to test the Solidity based microarchitecture interpreter - uses: actions/upload-artifact@v4 - with: - name: uarch-logs - path: uarch-riscv-tests-json-logs.tar.gz - compression-level: 0 - - name: Build machine-emulator "tests" docker image uses: docker/build-push-action@v5 with: @@ -760,7 +747,6 @@ jobs: artifacts/machine-emulator_*.deb artifacts/uarch-ram.bin add-generated-files.diff - uarch-logs/uarch-riscv-tests-json-logs.tar.gz tests-amd64/machine-emulator-tests-data.deb tests-amd64/machine-emulator-tests_*.deb tests-arm64/machine-emulator-tests_*.deb @@ -845,13 +831,19 @@ jobs: - name: Simple boot inside the docker image run: docker run --rm -t ${{ github.repository_owner }}/machine-emulator:tests /usr/bin/cartesi-machine /bin/true - - name: Create step log test files + - name: Generate risc0 fixtures run: | - mkdir -p /tmp/cartesi-machine/tests/data/step-logs - chmod -R 777 /tmp/cartesi-machine/tests/data - docker run --rm -t -v /tmp/cartesi-machine/tests/data:/tmp/cartesi-machine/tests/data ${{ github.repository_owner }}/machine-emulator:tests /usr/bin/cartesi-machine-tests --hash-function=sha256 --save-step-logs=/tmp/cartesi-machine/tests/data/step-logs run_step - docker run --rm -t -v /tmp/cartesi-machine/tests/data:/tmp/cartesi-machine/tests/data ${{ github.repository_owner }}/machine-emulator:tests /usr/share/cartesi-machine/tests/lua/create-step-logs.lua - ls -l /tmp/cartesi-machine/tests/data/step-logs + mkdir -p risc0/test/fixtures/cartesi-machine-tests risc0/test/fixtures/one-mcycle + chmod -R 777 risc0/test/fixtures + docker run --rm -t -v ${{ github.workspace }}/risc0/test/fixtures:/fixtures ${{ github.repository_owner }}/machine-emulator:tests \ + cartesi-machine-tests --jobs=$(nproc) --hash-function=sha256 --save-step-logs=/fixtures/cartesi-machine-tests run_step + docker run --rm -t -v ${{ github.workspace }}/risc0/test/fixtures:/fixtures ${{ github.repository_owner }}/machine-emulator:tests \ + /usr/share/cartesi-machine/tests/lua/record-one-mcycle.lua --output-dir=/fixtures/one-mcycle + # Adversarial logs the guest must reject (test_reject_fixtures.rs). Mirrors the third + # recorder in risc0/Makefile's fixtures target; tampers the positive logs above. + docker run --rm -t -v ${{ github.workspace }}/risc0/test/fixtures:/fixtures ${{ github.repository_owner }}/machine-emulator:tests \ + /usr/share/cartesi-machine/tests/lua/record-adversarial-machine.lua --fixtures-dir=/fixtures --output-dir=/fixtures/reject-machine + ls -lR risc0/test/fixtures - name: Install Rust toolchain uses: actions-rs/toolchain@v1 @@ -859,6 +851,9 @@ jobs: toolchain: stable override: true + - name: Check Rust formatting + run: make -C risc0 check-format + - name: Install rzup run: | cargo install --git https://github.com/risc0/risc0 rzup @@ -875,18 +870,17 @@ jobs: run: sudo apt-get update && sudo apt-get install -y lua5.4 - name: Install Foundry + # Pinned to the same forge version as the solidity-step job (single version across the + # workflow; the version that generated the committed transpiled .sol). uses: foundry-rs/foundry-toolchain@v1 + with: + version: v1.0.0 - name: Build risc0 prover/verifier run: make risc0 - - name: Copy step log fixture for RISC0 tests - run: | - mkdir -p risc0/test/fixtures - cp /tmp/cartesi-machine/tests/data/step-logs/step-0.log risc0/test/fixtures/step.log - - name: Run risc0 tests - run: CARTESI_STEP_LOGS_PATH=/tmp/cartesi-machine/tests/data/step-logs make test-risc0 + run: CARTESI_STEP_LOGS_PATH=${{ github.workspace }}/risc0/test/fixtures/cartesi-machine-tests make test-risc0 - name: Export RISC0 artifacts run: make -C risc0 export-artifacts @@ -899,3 +893,160 @@ jobs: risc0/artifacts/cartesi-risc0-guest-step-prover.bin risc0/artifacts/cartesi-risc0-guest-step-prover-image-id.txt risc0/artifacts/ImageID.sol + + solidity-step: + name: Solidity Step + needs: [check-format, check-commits] + runs-on: ubuntu-latest-8-cores + steps: + - name: Checkout machine emulator source code + uses: actions/checkout@v4 + with: + submodules: recursive + + # The transpiler drift check and its golden tests need only lua + lpeg + forge (no built + # emulator), so they run first and fail fast before the expensive image build below. + - name: Install lua5.4 and lpeg + run: | + sudo apt-get update && sudo apt-get install -y lua5.4 lua-lpeg + lua5.4 -e 'require("lpeg")' + + - name: Install Foundry + # Pinned: forge fmt output must match the version that generated the committed transpiled + # .sol files (the check-gen-transpiled drift check below), the same reason we pin clang-format. + uses: foundry-rs/foundry-toolchain@v1 + with: + version: v1.0.0 + + - name: Check transpiled Solidity is in sync with the C++ sources + run: make -C solidity-step check-gen-transpiled + + - name: Transpiler golden tests + run: make -C solidity-step test-transpile + + - name: Setup variables + run: echo MACHINE_EMULATOR_VERSION=`make version` >> $GITHUB_ENV + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Depot CLI + uses: depot/setup-action@v1 + + - name: Build machine-emulator "builder" docker image + uses: depot/build-push-action@v1 + with: + file: Dockerfile + context: . + target: builder + platforms: linux/amd64 + tags: ${{ github.repository_owner }}/machine-emulator:builder + push: false + load: true + build-args: | + DEBUG=${{ (startsWith(github.ref, 'refs/tags/v') && 'no' || 'yes') }} + GIT_COMMIT=${GITHUB_SHA} + MACHINE_EMULATOR_VERSION=${{ env.MACHINE_EMULATOR_VERSION }} + project: ${{ vars.DEPOT_PROJECT }} + token: ${{ secrets.DEPOT_TOKEN }} + + # tests/Dockerfile's runtime stage is FROM cartesi/machine-emulator:devel, so build it too. + - name: Build machine-emulator docker image + uses: depot/build-push-action@v1 + with: + file: Dockerfile + context: . + platforms: linux/amd64 + tags: ${{ github.repository_owner }}/machine-emulator:devel + push: false + load: true + build-args: | + DEBUG=${{ (startsWith(github.ref, 'refs/tags/v') && 'no' || 'yes') }} + GIT_COMMIT=${GITHUB_SHA} + MACHINE_EMULATOR_VERSION=${{ env.MACHINE_EMULATOR_VERSION }} + project: ${{ vars.DEPOT_PROJECT }} + token: ${{ secrets.DEPOT_TOKEN }} + + - name: Build machine-emulator "tests" docker image + uses: docker/build-push-action@v5 + with: + file: tests/Dockerfile + context: . + platforms: linux/amd64 + tags: ${{ github.repository_owner }}/machine-emulator:tests + push: false + load: true + build-args: | + DEBUG=${{ (startsWith(github.ref, 'refs/tags/v') && 'no' || 'yes') }} + MACHINE_EMULATOR_VERSION=${{ env.MACHINE_EMULATOR_VERSION }} + + # check-gen-constants reads the live cartesi Lua module, so the generator runs INSIDE the tests + # image (no built emulator on the runner); forge fmt + the diff run on the runner. -t is omitted + # so the container's stdout redirects to the file uncorrupted, and forge fmt runs from + # solidity-step/ so it picks up that project's [fmt] config (matching how the file was generated). + - name: Check emulator constants are in sync with the C++ headers + run: | + docker run --rm -v ${{ github.workspace }}:/work:ro -w /work ${{ github.repository_owner }}/machine-emulator:tests \ + lua5.4 solidity-step/tools/gen-emulator-constants.lua > /tmp/EmulatorConstants.gen.sol + cd solidity-step + forge fmt /tmp/EmulatorConstants.gen.sol + diff -u src/EmulatorConstants.sol /tmp/EmulatorConstants.gen.sol \ + || { echo "ERROR: src/EmulatorConstants.sol is stale. Run 'make -C solidity-step gen-constants' and commit."; exit 1; } + + - name: Generate solidity-step fixtures + run: | + FIX=solidity-step/test/fixtures + mkdir -p $FIX/uarch-tests $FIX/uarch-tests-per-cycle $FIX/send-cmio-response $FIX/reset-uarch + chmod -R 777 $FIX + IMG=${{ github.repository_owner }}/machine-emulator:tests + # uarch-riscv-tests is the installed wrapper (it bakes CARTESI_TESTS_UARCH_PATH); the + # record-*.lua recorders are run directly (executable, with a lua5.4 shebang). Mirrors + # solidity-step/Makefile's fixtures target; adversarial recorders tamper the positive sets. + docker run --rm -t -v ${{ github.workspace }}/$FIX:/fixtures $IMG \ + uarch-riscv-tests --test='rv64ui%-uarch%-.+%.bin' --output-dir=/fixtures/uarch-tests record_uarch_tests + docker run --rm -t -v ${{ github.workspace }}/$FIX:/fixtures $IMG \ + uarch-riscv-tests --test='rv64ui%-uarch%-.+%.bin' --per-cycle-logs --output-dir=/fixtures/uarch-tests-per-cycle record_uarch_tests + docker run --rm -t -v ${{ github.workspace }}/$FIX:/fixtures $IMG \ + /usr/share/cartesi-machine/tests/lua/record-send-cmio-response.lua --output-dir=/fixtures/send-cmio-response + docker run --rm -t -v ${{ github.workspace }}/$FIX:/fixtures $IMG \ + /usr/share/cartesi-machine/tests/lua/record-reset-uarch.lua --output-dir=/fixtures/reset-uarch + docker run --rm -t -v ${{ github.workspace }}/$FIX:/fixtures $IMG \ + /usr/share/cartesi-machine/tests/lua/record-adversarial-uarch.lua --fixtures-dir=/fixtures --output-dir=/fixtures/reject-uarch + docker run --rm -t -v ${{ github.workspace }}/$FIX:/fixtures $IMG \ + /usr/share/cartesi-machine/tests/lua/record-adversarial-send-cmio-response.lua --fixtures-dir=/fixtures --output-dir=/fixtures/reject-send-cmio-response + ls -lR $FIX + + - name: Run solidity-step tests + run: make -C solidity-step test + + # Coverage is informational and forge's --ir-minimum pass is slow, so it never gates the job. + - name: Solidity-step coverage report + continue-on-error: true + run: | + sudo apt-get update && sudo apt-get install -y lcov + cd solidity-step + forge coverage --ir-minimum --report summary --report lcov | tee coverage-summary.txt + genhtml lcov.info --output-directory coverage-html --title "solidity-step coverage" --quiet || true + + - name: Upload solidity-step coverage + continue-on-error: true + uses: actions/upload-artifact@v4 + with: + name: solidity-step-coverage + # coverage-html/ is the browsable report; coverage-summary.txt is the quick text table; + # lcov.info is the raw tracefile for tooling. + path: | + solidity-step/coverage-html + solidity-step/coverage-summary.txt + solidity-step/lcov.info + if-no-files-found: warn diff --git a/.gitignore b/.gitignore index e377ade0d..e541c4b98 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,10 @@ build tests/fuzz/seed-corpus tests/fuzz/corpus +tests/fuzz/fuzz-config +tests/fuzz/fuzz-interpret +tests/fuzz/fuzz-interpret-step +tests/fuzz/*.dSYM pkg third-party/downloads src/cartesi-jsonrpc-machine diff --git a/README.md b/README.md index da21c2563..f5e2b60b0 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ brew install cartesi-machine - Boost >= 1.83 - Lua >= 5.4.6 (optional, required for scripting support and interactive terminal) - Libslirp >= 4.6.0 (optional, required for networking support) +- luaposix (optional, required for running the test suite; install into the same Lua tree, e.g. `luarocks install luaposix`) - Rust and RISC Zero toolchain (optional, required for building the RISC Zero prover) ###### Debian Requirements @@ -155,11 +156,25 @@ rzup install r0vm Then build with `make risc0`. -###### Foundry Requirements (optional, for Solidity tests only) +###### Foundry Requirements (optional, for the Solidity step library) + +The in-tree `solidity-step/` library replays binary step logs on-chain for fraud-proof verification. +It is optional and lateral to the core: the main `make` build needs neither Foundry nor Rust. To +build and test it you'll need [Foundry](https://getfoundry.sh/), pinned to the version CI uses +(`forge fmt` output is version-specific, and the transpiled Solidity is drift-checked against it): ```sh curl -L https://foundry.paradigm.xyz | bash -foundryup +foundryup --install v1.0.0 +``` + +The library has its own `solidity-step/` Makefile; drive it directly (recording fixtures needs a +built emulator and its uarch test programs): + +```sh +make -C solidity-step build # compile the contracts +make -C solidity-step fixtures # record step-log fixtures from the emulator +make -C solidity-step test # forge fmt --check + forge test ``` #### Build diff --git a/risc0/Makefile b/risc0/Makefile index 00ba67313..469c73a69 100644 --- a/risc0/Makefile +++ b/risc0/Makefile @@ -21,12 +21,24 @@ RISC0_TEST_DEV_ONLY ?= 0 LUA_BIN ?= lua5.4 LUA ?= $(LUA_BIN) -STEP_LOG_UTIL := $(abspath step-log-util.lua) +STEP_LOG_UTIL := $(abspath ../src/step-log-util.lua) CARTESI_RISC0_CLI := $(abspath rust/target/debug/cartesi-risc0-cli) -CARTESI_MACHINE ?= cartesi-machine.lua +EMULATOR_DIR := $(abspath ..) +# Restricts which machine tests are recorded (Lua pattern, full match); default = all. +MACHINE_TEST ?= + +TARGET_OS ?= $(shell uname -s) +ifeq ($(TARGET_OS),Darwin) +NUM_JOBS ?= $(shell sysctl -n hw.ncpu) +else +NUM_JOBS ?= $(shell nproc) +endif +# Fixture dirs are absolute so they survive the cd into $(EMULATOR_DIR) in the recipe. FIXTURES_DIR := test/fixtures -STEP_LOG := $(FIXTURES_DIR)/step.log +MACHINE_FIXTURES_DIR := $(abspath $(FIXTURES_DIR)/cartesi-machine-tests) +ONE_MCYCLE_DIR := $(abspath $(FIXTURES_DIR)/one-mcycle) +STEP_LOG := $(ONE_MCYCLE_DIR)/one-mcycle.log RECEIPT := $(FIXTURES_DIR)/receipt.bin SEAL := $(FIXTURES_DIR)/seal.bin JOURNAL := $(FIXTURES_DIR)/journal.bin @@ -44,39 +56,59 @@ ifneq ($(RISC0_TEST_DEV_ONLY),1) test: test-pipeline test-solidity endif +check-format: + @$(MAKE) -C rust check-format + +format: + @$(MAKE) -C rust format + test-dev-mode: @$(MAKE) -C rust test test-pipeline: $(SEAL) @echo "--- Verifying receipt ---" @HASH_BEFORE=$$($(LUA) $(STEP_LOG_UTIL) root-hash-before $(STEP_LOG)) && \ - MCYCLE_COUNT=$$($(LUA) $(STEP_LOG_UTIL) mcycle-count $(STEP_LOG)) && \ + CYCLE_COUNT=$$($(LUA) $(STEP_LOG_UTIL) requested-cycle-count $(STEP_LOG)) && \ HASH_AFTER=$$($(LUA) $(STEP_LOG_UTIL) root-hash-after $(STEP_LOG)) && \ $(CARTESI_RISC0_CLI) verify \ - $(RECEIPT) "$$HASH_BEFORE" "$$MCYCLE_COUNT" "$$HASH_AFTER" && \ + $(RECEIPT) "$$HASH_BEFORE" "$$CYCLE_COUNT" "$$HASH_AFTER" && \ echo "--- Verifying seal ---" && \ $(CARTESI_RISC0_CLI) verify-seal \ - $(SEAL) $(JOURNAL) "$$HASH_BEFORE" "$$MCYCLE_COUNT" "$$HASH_AFTER" && \ + $(SEAL) $(JOURNAL) "$$HASH_BEFORE" "$$CYCLE_COUNT" "$$HASH_AFTER" && \ echo "--- Pipeline test passed ---" test-solidity: @$(MAKE) -C solidity test +# Generate risc0's fixtures via the emulator's shared recorders (needs a built emulator): +# cartesi-machine-tests/ machine step logs for dev-mode replay coverage +# one-mcycle/ one 1-mcycle log for the prove->compress->Sepolia pipeline +# reject-machine/ structurally-invalid logs the guest must reject (tampers the above) +fixtures: + @mkdir -p $(MACHINE_FIXTURES_DIR) $(ONE_MCYCLE_DIR) + @cd $(EMULATOR_DIR) && eval $$($(MAKE) env) && \ + $(LUA) tests/lua/cartesi-machine-tests.lua --jobs=$(NUM_JOBS) --hash-function=sha256 \ + $(if $(MACHINE_TEST),--test='$(MACHINE_TEST)',) \ + --test-path=tests/build/machine \ + --save-step-logs=$(MACHINE_FIXTURES_DIR) run_step && \ + $(LUA) tests/lua/record-one-mcycle.lua --hash-function=sha256 \ + --output-dir=$(ONE_MCYCLE_DIR) && \ + $(LUA) tests/lua/record-adversarial-machine.lua \ + --fixtures-dir=$(abspath $(FIXTURES_DIR)) \ + --output-dir=$(abspath $(FIXTURES_DIR))/reject-machine + $(STEP_LOG): - @mkdir -p $(FIXTURES_DIR) - @echo "--- Generating step log ---" - $(CARTESI_MACHINE) \ - --hash-tree=hash_function:sha256 \ - --max-mcycle=0 \ - --log-step=1,$@ + @echo "$(STEP_LOG) not found; run 'make fixtures' (needs a built emulator)" >&2 + @exit 1 $(RECEIPT): $(STEP_LOG) + @mkdir -p $(FIXTURES_DIR) @echo "--- Proving step log ---" @HASH_BEFORE=$$($(LUA) $(STEP_LOG_UTIL) root-hash-before $(STEP_LOG)) && \ - MCYCLE_COUNT=$$($(LUA) $(STEP_LOG_UTIL) mcycle-count $(STEP_LOG)) && \ + CYCLE_COUNT=$$($(LUA) $(STEP_LOG_UTIL) requested-cycle-count $(STEP_LOG)) && \ HASH_AFTER=$$($(LUA) $(STEP_LOG_UTIL) root-hash-after $(STEP_LOG)) && \ $(CARTESI_RISC0_CLI) prove \ - "$$HASH_BEFORE" $(STEP_LOG) "$$MCYCLE_COUNT" "$$HASH_AFTER" \ + "$$HASH_BEFORE" $(STEP_LOG) "$$CYCLE_COUNT" "$$HASH_AFTER" \ $(RECEIPT) $(SEAL): $(RECEIPT) @@ -101,4 +133,4 @@ clean: -@$(MAKE) -C solidity clean rm -rf $(FIXTURES_DIR) -.PHONY: all cpp rust clean test test-dev-mode test-pipeline test-solidity export-artifacts image-id +.PHONY: all cpp rust clean check-format format test test-dev-mode test-pipeline test-solidity export-artifacts image-id fixtures diff --git a/risc0/cpp/Makefile b/risc0/cpp/Makefile index 377a37aee..253acb7c6 100644 --- a/risc0/cpp/Makefile +++ b/risc0/cpp/Makefile @@ -54,7 +54,7 @@ endif # Flags to minimize undefined behavior UBFLAGS := -fno-strict-aliasing -fno-strict-overflow -fno-delete-null-pointer-checks -CFLAGS := -march=rv32im -mabi=ilp32 -Wl,--gc-sections $(OPTFLAGS) $(UBFLAGS) \ +CFLAGS := -march=rv32im -mabi=ilp32 -Wl,--gc-sections $(OPTFLAGS) $(UBFLAGS) -MMD -MP \ -DZKARCHITECTURE=1 \ $(DUMP_DEFS) \ -DAVOID_NATIVE_UINT128_T=1 \ @@ -120,5 +120,8 @@ $(EMULATOR_SRC_DIR)/interpret-jump-table.hpp: $(EMULATOR_TOOLS_DIR)/gen-interpre risc0-replay-steps.o: $(EMULATOR_SRC_DIR)/cm-version.h $(EMULATOR_SRC_DIR)/interpret-jump-table.hpp $(EMULATOR_OBJS) $(RISC0ARCH_OBJS) $(RISC0_THIRDPARTY_OBJS) $(LD) -relocatable $(EMULATOR_OBJS) $(RISC0ARCH_OBJS) $(RISC0_THIRDPARTY_OBJS) -o $@ +# Rebuild objects when an included header (e.g. shared emulator sources) changes. +-include $(wildcard *.d) + clean: - @rm -f *.o + @rm -f *.o *.d diff --git a/risc0/cpp/risc0-replay-steps.cpp b/risc0/cpp/risc0-replay-steps.cpp index 0597a9c2e..61ecdb111 100644 --- a/risc0/cpp/risc0-replay-steps.cpp +++ b/risc0/cpp/risc0-replay-steps.cpp @@ -35,10 +35,14 @@ extern "C" void risc0_replay_steps( replay_step_state_access::context context{}; replay_step_state_access a(context, step_log_image, step_log_image_size); uint64_t mcycle_end{}; - (void) __builtin_add_overflow(a.read_mcycle(), context.logged_mcycle_count, &mcycle_end); + // Saturate on overflow, matching machine::verify_step's saturating_add, so the RISC0 + // guest and the host replayer agree on the cycle target. + if (__builtin_add_overflow(a.read_mcycle(), context.log.requested_cycle_count, &mcycle_end)) { + mcycle_end = UINT64_MAX; + } interpret(a, mcycle_end); a.finish(); - std::memcpy(out_root_hash_before, context.logged_root_hash_before.data(), 32); - *out_mcycle_count = context.logged_mcycle_count; - std::memcpy(out_root_hash_after, context.logged_root_hash_after.data(), 32); + std::memcpy(out_root_hash_before, context.log.root_hash_before.data(), 32); + *out_mcycle_count = context.log.requested_cycle_count; + std::memcpy(out_root_hash_after, context.log.root_hash_after.data(), 32); } diff --git a/risc0/rust/Makefile b/risc0/rust/Makefile index 86644f3e5..cd9bf9574 100644 --- a/risc0/rust/Makefile +++ b/risc0/rust/Makefile @@ -15,7 +15,8 @@ # RISC0_DEV_MODE ?= 1 -CARTESI_STEP_LOGS_PATH ?= $(abspath ../../tests/build/step-logs) +# Machine step logs the dev-mode test replays (generate with `make -C .. fixtures`). +CARTESI_STEP_LOGS_PATH ?= $(abspath ../test/fixtures/cartesi-machine-tests) ARTIFACTS_DIR ?= $(abspath ../artifacts) RISC0_FEATURES ?= @@ -32,6 +33,16 @@ all: test: cargo test $(CARGO_FEATURES) +# The guest is a separate workspace (its own Cargo.lock), so cargo fmt at the +# root does not reach it; format and check it explicitly. +format: + cargo fmt + cargo fmt --manifest-path methods/guest/Cargo.toml + +check-format: + cargo fmt --check + cargo fmt --check --manifest-path methods/guest/Cargo.toml + export-artifacts: all cargo run --bin cartesi-risc0-cli -- export-artifacts $(ARTIFACTS_DIR) @@ -42,4 +53,4 @@ clean: @if command -v cargo >/dev/null 2>&1; then cargo clean; fi @rm -rf $(ARTIFACTS_DIR) -.PHONY: all test clean export-artifacts image-id +.PHONY: all test format check-format clean export-artifacts image-id diff --git a/risc0/rust/cartesi-risc0/src/lib.rs b/risc0/rust/cartesi-risc0/src/lib.rs index 2b395e03d..47c492541 100644 --- a/risc0/rust/cartesi-risc0/src/lib.rs +++ b/risc0/rust/cartesi-risc0/src/lib.rs @@ -18,19 +18,14 @@ use std::fs; pub type MachineHash = [u8; 32]; use risc0_zkvm::{ - default_prover, ExecutorEnv, Groth16Receipt, Groth16ReceiptVerifierParameters, - InnerReceipt, MaybePruned, ProverOpts, Receipt, ReceiptClaim, + default_prover, sha::{Digest, Digestible}, + ExecutorEnv, Groth16Receipt, Groth16ReceiptVerifierParameters, InnerReceipt, MaybePruned, + ProverOpts, Receipt, ReceiptClaim, }; pub use methods::{REPLAY_STEP_ELF, REPLAY_STEP_ID}; -/// Step log header layout: -/// - root_hash_before: 32 bytes -/// - mcycle_count: 8 bytes (u64 little-endian) -/// - root_hash_after: 32 bytes -pub const STEP_LOG_HEADER_SIZE: usize = 32 + 8 + 32; - /// Journal layout (ABI-encoded, 96 bytes): /// - root_hash_before: bytes32 (32 bytes) /// - mcycle_count: uint64 padded to 32 bytes (24 zero bytes + 8 bytes big-endian) @@ -41,7 +36,12 @@ pub const JOURNAL_SIZE: usize = 96; /// Decode the ABI-encoded journal bytes (96 bytes) into its components. fn decode_journal(bytes: &[u8]) -> (MachineHash, u64, MachineHash) { - assert!(bytes.len() == JOURNAL_SIZE, "Journal must be {} bytes (abi.encode format), got {}", JOURNAL_SIZE, bytes.len()); + assert!( + bytes.len() == JOURNAL_SIZE, + "Journal must be {} bytes (abi.encode format), got {}", + JOURNAL_SIZE, + bytes.len() + ); let mut root_hash_before = [0u8; 32]; root_hash_before.copy_from_slice(&bytes[0..32]); let mcycle_count = u64::from_be_bytes(bytes[56..64].try_into().unwrap()); @@ -64,21 +64,51 @@ pub fn prove( mcycle_count: u64, root_hash_after: &MachineHash, ) -> Receipt { - let log_data = fs::read(log_file_path).expect("Could not read log file"); + try_prove( + guest_elf, + root_hash_before, + log_file_path, + mcycle_count, + root_hash_after, + ) + .unwrap_or_else(|e| panic!("{e}")) +} + +/// Like `prove`, but returns the failure as an `Err` instead of panicking. A structurally +/// invalid log makes the guest abort via `zk_abort_with_msg`, surfaced here with the same +/// message the C++ host throws; a caller belief that disagrees with the journal is reported +/// too. Used by the reject-fixture test to assert the guest rejects forged logs. +pub fn try_prove( + guest_elf: &[u8], + root_hash_before: &MachineHash, + log_file_path: &str, + mcycle_count: u64, + root_hash_after: &MachineHash, +) -> Result { + let log_data = fs::read(log_file_path).map_err(|e| format!("could not read log file: {e}"))?; let env = ExecutorEnv::builder() .write_slice(&log_data) .build() - .unwrap(); + .map_err(|e| format!("could not build executor env: {e}"))?; let prover = default_prover(); - let receipt = prover.prove_with_opts(env, guest_elf, &ProverOpts::default()).unwrap().receipt; + let receipt = prover + .prove_with_opts(env, guest_elf, &ProverOpts::default()) + .map_err(|e| format!("{e:?}"))? + .receipt; let (j_hash_before, j_mcycle, j_hash_after) = decode_journal(&receipt.journal.bytes); - assert!(j_hash_before == *root_hash_before, "root_hash_before mismatch: argument does not match journal"); - assert!(j_mcycle == mcycle_count, "mcycle_count mismatch: argument does not match journal"); - assert!(j_hash_after == *root_hash_after, "root_hash_after mismatch: argument does not match journal"); + if j_hash_before != *root_hash_before { + return Err("root_hash_before mismatch: argument does not match journal".to_string()); + } + if j_mcycle != mcycle_count { + return Err("mcycle_count mismatch: argument does not match journal".to_string()); + } + if j_hash_after != *root_hash_after { + return Err("root_hash_after mismatch: argument does not match journal".to_string()); + } - receipt + Ok(receipt) } /// Compress a receipt to Groth16 and encode it for Solidity contract consumption. @@ -113,11 +143,21 @@ pub fn verify_seal( mcycle_count: u64, root_hash_after: &MachineHash, ) -> (MachineHash, u64, MachineHash) { - assert!(seal.len() == 260, "seal must be 260 bytes (4-byte selector + 256-byte proof), got {}", seal.len()); + assert!( + seal.len() == 260, + "seal must be 260 bytes (4-byte selector + 256-byte proof), got {}", + seal.len() + ); + // The seal prefix selects the on-chain verifier; a seal carrying a different selector would route + // elsewhere (or fail) in the Verifier Router, so reject it here instead of silently dropping it. + let verifier_parameters = Groth16ReceiptVerifierParameters::default().digest(); + assert!( + seal[..4] == verifier_parameters.as_bytes()[..4], + "seal selector does not match the expected Groth16 verifier parameters" + ); let raw_seal = &seal[4..]; let image_id_digest: Digest = (*image_id).into(); let claim = ReceiptClaim::ok(image_id_digest, journal_bytes.to_vec()); - let verifier_parameters = Groth16ReceiptVerifierParameters::default().digest(); let groth16_receipt = Groth16Receipt::new( raw_seal.to_vec(), MaybePruned::Value(claim), @@ -129,9 +169,18 @@ pub fn verify_seal( ); receipt.verify(*image_id).unwrap(); let (j_hash_before, j_mcycle, j_hash_after) = decode_journal(journal_bytes); - assert!(j_hash_before == *root_hash_before, "root_hash_before mismatch: argument does not match journal"); - assert!(j_mcycle == mcycle_count, "mcycle_count mismatch: argument does not match journal"); - assert!(j_hash_after == *root_hash_after, "root_hash_after mismatch: argument does not match journal"); + assert!( + j_hash_before == *root_hash_before, + "root_hash_before mismatch: argument does not match journal" + ); + assert!( + j_mcycle == mcycle_count, + "mcycle_count mismatch: argument does not match journal" + ); + assert!( + j_hash_after == *root_hash_after, + "root_hash_after mismatch: argument does not match journal" + ); (j_hash_before, j_mcycle, j_hash_after) } @@ -144,8 +193,36 @@ pub fn verify( ) -> (MachineHash, u64, MachineHash) { receipt.verify(*image_id).unwrap(); let (j_hash_before, j_mcycle, j_hash_after) = decode_journal(&receipt.journal.bytes); - assert!(j_hash_before == *root_hash_before, "root_hash_before mismatch: argument does not match journal"); - assert!(j_mcycle == mcycle_count, "mcycle_count mismatch: argument does not match journal"); - assert!(j_hash_after == *root_hash_after, "root_hash_after mismatch: argument does not match journal"); + assert!( + j_hash_before == *root_hash_before, + "root_hash_before mismatch: argument does not match journal" + ); + assert!( + j_mcycle == mcycle_count, + "mcycle_count mismatch: argument does not match journal" + ); + assert!( + j_hash_after == *root_hash_after, + "root_hash_after mismatch: argument does not match journal" + ); (j_hash_before, j_mcycle, j_hash_after) } + +#[cfg(test)] +mod tests { + use super::*; + + // A seal whose 4-byte selector does not match the Groth16 verifier parameters must be rejected up + // front, not silently stripped: on-chain the prefix routes the Verifier Router. The check runs + // before any receipt reconstruction, so a corrupted selector fails here without a valid proof. + #[test] + #[should_panic(expected = "seal selector does not match")] + fn verify_seal_rejects_wrong_selector() { + let selector = Groth16ReceiptVerifierParameters::default().digest(); + let mut seal = vec![0u8; 260]; + seal[..4].copy_from_slice(&selector.as_bytes()[..4]); + seal[0] ^= 0xff; // corrupt the selector so it no longer matches + let zero: MachineHash = [0u8; 32]; + verify_seal(&[0u32; 8], &seal, &[0u8; 96], &zero, 0, &zero); + } +} diff --git a/risc0/rust/cartesi-risc0/src/main.rs b/risc0/rust/cartesi-risc0/src/main.rs index c13842309..a7efbcace 100644 --- a/risc0/rust/cartesi-risc0/src/main.rs +++ b/risc0/rust/cartesi-risc0/src/main.rs @@ -36,10 +36,12 @@ Pipeline: */ -use std::{fs, env, error, path::Path}; -use risc0_zkvm::Receipt; -use cartesi_risc0::{prove, compress, verify, verify_seal, guest_image_id, REPLAY_STEP_ELF, REPLAY_STEP_ID}; use cartesi_risc0::MachineHash; +use cartesi_risc0::{ + compress, guest_image_id, prove, verify, verify_seal, REPLAY_STEP_ELF, REPLAY_STEP_ID, +}; +use risc0_zkvm::Receipt; +use std::{env, error, fs, path::Path}; fn parse_hash(hex: &str) -> MachineHash { let bytes = hex::decode(hex).expect("Invalid hex string"); @@ -59,7 +61,11 @@ fn image_id_to_hex(id: &[u32; 8]) -> String { .collect() } -fn export_artifacts(guest_elf: &[u8], image_id: &[u32; 8], output_dir: &str) -> Result<(), Box> { +fn export_artifacts( + guest_elf: &[u8], + image_id: &[u32; 8], + output_dir: &str, +) -> Result<(), Box> { let output_path = Path::new(output_dir); fs::create_dir_all(output_path)?; @@ -77,29 +83,62 @@ fn export_artifacts(guest_elf: &[u8], image_id: &[u32; 8], output_dir: &str) -> Ok(()) } -fn prove_and_save_receipt(guest_elf: &[u8], root_hash_before: MachineHash, log_file_path: &str, mcycle_count: u64, root_hash_after: MachineHash, receipt_path: &str) -> Result<(), Box> { +fn prove_and_save_receipt( + guest_elf: &[u8], + root_hash_before: MachineHash, + log_file_path: &str, + mcycle_count: u64, + root_hash_after: MachineHash, + receipt_path: &str, +) -> Result<(), Box> { println!("Proving step log: {}", log_file_path); - let receipt = prove(guest_elf, &root_hash_before, log_file_path, mcycle_count, &root_hash_after); + let receipt = prove( + guest_elf, + &root_hash_before, + log_file_path, + mcycle_count, + &root_hash_after, + ); fs::write(receipt_path, bincode::serialize(&receipt)?)?; println!("Receipt saved to: {}", receipt_path); Ok(()) } -fn compress_and_save(receipt_path: &str, seal_path: &str, journal_path: &str) -> Result<(), Box> { +fn compress_and_save( + receipt_path: &str, + seal_path: &str, + journal_path: &str, +) -> Result<(), Box> { println!("Compressing receipt to Groth16: {}", receipt_path); let receipt: Receipt = bincode::deserialize(&fs::read(receipt_path)?)?; let (seal, journal) = compress(&receipt); fs::write(seal_path, &seal)?; println!("Seal saved to: {} ({} bytes)", seal_path, seal.len()); fs::write(journal_path, &journal)?; - println!("Journal saved to: {} ({} bytes)", journal_path, journal.len()); + println!( + "Journal saved to: {} ({} bytes)", + journal_path, + journal.len() + ); Ok(()) } -fn verify_receipt(image_id: &[u32; 8], receipt_path: &str, root_hash_before: MachineHash, mcycle_count: u64, root_hash_after: MachineHash) -> Result<(), Box> { +fn verify_receipt( + image_id: &[u32; 8], + receipt_path: &str, + root_hash_before: MachineHash, + mcycle_count: u64, + root_hash_after: MachineHash, +) -> Result<(), Box> { println!("Verifying receipt: {}", receipt_path); let receipt: Receipt = bincode::deserialize(&fs::read(receipt_path)?)?; - let (j_hash_before, j_mcycle, j_hash_after) = verify(image_id, &receipt, &root_hash_before, mcycle_count, &root_hash_after); + let (j_hash_before, j_mcycle, j_hash_after) = verify( + image_id, + &receipt, + &root_hash_before, + mcycle_count, + &root_hash_after, + ); println!("Verification successful"); println!("Journal contents:"); println!(" root_hash_before: {}", hash_to_hex(&j_hash_before)); @@ -108,11 +147,28 @@ fn verify_receipt(image_id: &[u32; 8], receipt_path: &str, root_hash_before: Mac Ok(()) } -fn verify_seal_and_journal(image_id: &[u32; 8], seal_path: &str, journal_path: &str, root_hash_before: MachineHash, mcycle_count: u64, root_hash_after: MachineHash) -> Result<(), Box> { - println!("Verifying seal and journal: seal={}, journal={}", seal_path, journal_path); +fn verify_seal_and_journal( + image_id: &[u32; 8], + seal_path: &str, + journal_path: &str, + root_hash_before: MachineHash, + mcycle_count: u64, + root_hash_after: MachineHash, +) -> Result<(), Box> { + println!( + "Verifying seal and journal: seal={}, journal={}", + seal_path, journal_path + ); let seal = fs::read(seal_path)?; let journal_bytes = fs::read(journal_path)?; - let (j_hash_before, j_mcycle, j_hash_after) = verify_seal(image_id, &seal, &journal_bytes, &root_hash_before, mcycle_count, &root_hash_after); + let (j_hash_before, j_mcycle, j_hash_after) = verify_seal( + image_id, + &seal, + &journal_bytes, + &root_hash_before, + mcycle_count, + &root_hash_after, + ); println!("Verification successful"); println!("Journal contents:"); println!(" root_hash_before: {}", hash_to_hex(&j_hash_before)); @@ -123,12 +179,12 @@ fn verify_seal_and_journal(image_id: &[u32; 8], seal_path: &str, journal_path: & fn usage() { eprintln!("Usage: cartesi-risc0-cli [options] "); - eprintln!(""); + eprintln!(); eprintln!("Options:"); eprintln!(" --guest-elf Use a precompiled guest binary (R0BF format) instead of"); eprintln!(" the embedded one. Enables canonical Image ID on machines"); eprintln!(" built without Docker."); - eprintln!(""); + eprintln!(); eprintln!("Commands:"); eprintln!(" prove "); eprintln!(" compress "); @@ -164,7 +220,11 @@ fn main() { std::process::exit(1); }); let id = guest_image_id(&elf); - eprintln!("Using guest ELF: {} (Image ID: {})", path, image_id_to_hex(&id)); + eprintln!( + "Using guest ELF: {} (Image ID: {})", + path, + image_id_to_hex(&id) + ); (elf, id) } None => (REPLAY_STEP_ELF.to_vec(), REPLAY_STEP_ID), @@ -190,11 +250,22 @@ fn main() { let mcycle_count: u64 = args[4].parse().expect("Invalid mcycle count"); let root_hash_after = parse_hash(&args[5]); let receipt_path = &args[6]; - prove_and_save_receipt(&guest_elf, root_hash_before, log_file_path, mcycle_count, root_hash_after, receipt_path).expect("Proof generation failed"); + prove_and_save_receipt( + &guest_elf, + root_hash_before, + log_file_path, + mcycle_count, + root_hash_after, + receipt_path, + ) + .expect("Proof generation failed"); } "compress" => { if args.len() != 5 { - eprintln!("Usage: {} compress ", args[0]); + eprintln!( + "Usage: {} compress ", + args[0] + ); std::process::exit(1); } let receipt_path = &args[2]; @@ -211,7 +282,14 @@ fn main() { let root_hash_before = parse_hash(&args[3]); let mcycle_count: u64 = args[4].parse().expect("Invalid mcycle count"); let root_hash_after = parse_hash(&args[5]); - verify_receipt(&image_id, receipt_path, root_hash_before, mcycle_count, root_hash_after).expect("Verification failed"); + verify_receipt( + &image_id, + receipt_path, + root_hash_before, + mcycle_count, + root_hash_after, + ) + .expect("Verification failed"); } "verify-seal" => { if args.len() != 7 { @@ -223,7 +301,15 @@ fn main() { let root_hash_before = parse_hash(&args[4]); let mcycle_count: u64 = args[5].parse().expect("Invalid mcycle count"); let root_hash_after = parse_hash(&args[6]); - verify_seal_and_journal(&image_id, seal_path, journal_path, root_hash_before, mcycle_count, root_hash_after).expect("Seal verification failed"); + verify_seal_and_journal( + &image_id, + seal_path, + journal_path, + root_hash_before, + mcycle_count, + root_hash_after, + ) + .expect("Seal verification failed"); } "export-artifacts" => { if args.len() != 3 { diff --git a/risc0/rust/cartesi-risc0/tests/test_prove_and_verify.rs b/risc0/rust/cartesi-risc0/tests/test_prove_and_verify.rs index 5d68639f8..b1b92e7e2 100644 --- a/risc0/rust/cartesi-risc0/tests/test_prove_and_verify.rs +++ b/risc0/rust/cartesi-risc0/tests/test_prove_and_verify.rs @@ -1,65 +1,97 @@ -use cartesi_risc0::{prove, verify, REPLAY_STEP_ELF, REPLAY_STEP_ID}; use cartesi_risc0::MachineHash; +use cartesi_risc0::{prove, verify, REPLAY_STEP_ELF, REPLAY_STEP_ID}; use std::fs; use std::path::Path; -use std::io::Read; - -fn read_step_log_header(path: &str) -> Result<(MachineHash, u64, MachineHash), String> { - let mut file = fs::File::open(path) - .map_err(|e| format!("Failed to open step log: {}", e))?; - - let mut header = [0u8; 72]; - file.read_exact(&mut header) - .map_err(|e| format!("Failed to read step log header: {}", e))?; - - let mut root_hash_before = [0u8; 32]; - root_hash_before.copy_from_slice(&header[0..32]); - let mcycle_count = u64::from_le_bytes([ - header[32], header[33], header[34], header[35], - header[36], header[37], header[38], header[39], - ]); +// Expected hashes come from the _manifest.csv written by the recorder from the +// LIVE machine - an independent source of truth, not re-read from the log header. +// This is what makes the test exercise the verifier's Layer 2 (caller belief vs +// log) and not just Layer 1 (log internal consistency): a log whose header +// disagrees with the recorded truth is caught here. +struct ManifestRow { + kind: String, + name: String, + cycle_count: u64, + root_before: MachineHash, + root_after: MachineHash, +} - let mut root_hash_after = [0u8; 32]; - root_hash_after.copy_from_slice(&header[40..72]); +fn parse_hash(s: &str) -> MachineHash { + let hex = s.strip_prefix("0x").unwrap_or(s); + assert_eq!(hex.len(), 64, "expected 32-byte hex hash, got {:?}", s); + let mut out = [0u8; 32]; + for (i, byte) in out.iter_mut().enumerate() { + *byte = u8::from_str_radix(&hex[2 * i..2 * i + 2], 16) + .unwrap_or_else(|_| panic!("invalid hex in hash: {:?}", s)); + } + out +} - Ok((root_hash_before, mcycle_count, root_hash_after)) +fn read_manifest(dir: &Path) -> Vec { + let path = dir.join("_manifest.csv"); + let text = fs::read_to_string(&path) + .unwrap_or_else(|e| panic!("failed to read manifest {}: {}", path.display(), e)); + let mut rows = Vec::new(); + for line in text.lines().skip(1) { + if line.is_empty() { + continue; + } + // Schema: kind,name,expectError,hashFunction,requestedCycleCount,rootHashBefore,rootHashAfter, + // reason,dataLength,data,revertRootHash + let cols: Vec<&str> = line.split(',').collect(); + assert!(cols.len() >= 7, "malformed manifest row: {:?}", line); + rows.push(ManifestRow { + kind: cols[0].to_string(), + name: cols[1].to_string(), + cycle_count: cols[4] + .parse() + .unwrap_or_else(|_| panic!("bad cycle count: {:?}", cols[4])), + root_before: parse_hash(cols[5]), + root_after: parse_hash(cols[6]), + }); + } + rows } #[test] fn test_prove_and_verify() { - let fixtures_dir = Path::new(env!("CARTESI_STEP_LOGS_PATH")); - - assert!(fixtures_dir.exists(), "Fixtures directory does not exist: {}", fixtures_dir.display()); - - let dir_entries = fs::read_dir(&fixtures_dir) - .expect("Failed to read directory") - .collect::>(); - - if dir_entries.is_empty() { - panic!("No step log files found in directory: {}", fixtures_dir.display()); - } + let dir = Path::new(env!("CARTESI_STEP_LOGS_PATH")); + assert!( + dir.exists(), + "Fixtures directory does not exist: {}", + dir.display() + ); - for entry in dir_entries { - let entry = entry.expect("Failed to read directory entry"); - let path = entry.path(); - let file_name = path.file_name().unwrap().to_str().unwrap(); + let rows = read_manifest(dir); + assert!(!rows.is_empty(), "manifest has no rows: {}", dir.display()); - // Skip files that don't match the step log pattern - if !file_name.starts_with("step-") || !file_name.ends_with(".log") { + let mut replayed = 0; + for row in &rows { + // RISC0 replays the machine-level architecture step logs only. + if row.kind != "machine" { continue; } - - let (root_hash_before, mcycle_count, root_hash_after) = - read_step_log_header(path.to_str().unwrap()) - .expect(&format!("Failed to read step log header from {}", file_name)); - - eprintln!( - "Verifying step file: {}\nStart hash: {:02x?}, Cycle count: {}, End hash: {:02x?}", - file_name, root_hash_before, mcycle_count, root_hash_after + let path = dir.join(&row.name); + eprintln!("Verifying {} (cycles={})", row.name, row.cycle_count); + let receipt = prove( + REPLAY_STEP_ELF, + &row.root_before, + path.to_str().unwrap(), + row.cycle_count, + &row.root_after, ); - - let receipt = prove(REPLAY_STEP_ELF, &root_hash_before, path.to_str().unwrap(), mcycle_count, &root_hash_after); - verify(&REPLAY_STEP_ID, &receipt, &root_hash_before, mcycle_count, &root_hash_after); + verify( + &REPLAY_STEP_ID, + &receipt, + &row.root_before, + row.cycle_count, + &row.root_after, + ); + replayed += 1; } + assert!( + replayed > 0, + "no machine step-log rows in manifest: {}", + dir.display() + ); } diff --git a/risc0/rust/cartesi-risc0/tests/test_reject_fixtures.rs b/risc0/rust/cartesi-risc0/tests/test_reject_fixtures.rs new file mode 100644 index 000000000..7fca257ed --- /dev/null +++ b/risc0/rust/cartesi-risc0/tests/test_reject_fixtures.rs @@ -0,0 +1,136 @@ +use cartesi_risc0::MachineHash; +use cartesi_risc0::{try_prove, REPLAY_STEP_ELF}; +use std::fs; +use std::path::Path; + +// The big-machine (sha256) reject fixtures (tests/lua/record-adversarial-machine.lua) are +// structurally invalid logs. The guest must abort on each -- via zk_abort_with_msg, which +// carries the same message the C++ host throws -- rather than produce a valid receipt. This +// is the soundness statement: a malicious prover cannot get a proof for a forged log. + +fn parse_hash(s: &str) -> MachineHash { + let hex = s.strip_prefix("0x").unwrap_or(s); + assert_eq!(hex.len(), 64, "expected 32-byte hex hash, got {:?}", s); + let mut out = [0u8; 32]; + for (i, byte) in out.iter_mut().enumerate() { + *byte = u8::from_str_radix(&hex[2 * i..2 * i + 2], 16) + .unwrap_or_else(|_| panic!("invalid hex in hash: {:?}", s)); + } + out +} + +/// The substring the guest abort message must contain for each reject tag (the C++ throw +/// message, surfaced through zk_abort_with_msg). +fn expected_message(tag: &str) -> &'static str { + match tag { + "bad_signature" => "invalid step log signature", + "unsupported_hash_function" => "unsupported hash function type", + "nonzero_scratch_hash" => "scratch hash area is not zero", + "initial_root_mismatch" => "initial root hash mismatch", + "page_count_zero" => "page count is zero", + "page_count_exceeds_size" => "page count exceeds step log size", + "sibling_count_mismatch" => "sibling count does not match step log size", + "page_index_not_increasing" => "page index is not in increasing order", + "too_few_siblings" => "too few sibling hashes in log", + other => panic!("unmapped reject tag: {other}"), + } +} + +#[test] +fn test_guest_rejects_forged_logs() { + // reject-machine/ sits next to the positive cartesi-machine-tests/ fixtures. + let dir = Path::new(env!("CARTESI_STEP_LOGS_PATH")) + .parent() + .expect("fixtures parent") + .join("reject-machine"); + assert!( + dir.exists(), + "reject fixtures dir does not exist: {}", + dir.display() + ); + + let text = fs::read_to_string(dir.join("_manifest.csv")) + .unwrap_or_else(|e| panic!("failed to read reject manifest: {e}")); + + let mut checked = 0; + for line in text.lines().skip(1) { + if line.is_empty() { + continue; + } + // Schema: kind,name,expectError,hashFunction,cycle,before,after,reason,dataLength,data,revertRootHash + let cols: Vec<&str> = line.split(',').collect(); + assert!(cols.len() >= 7, "malformed reject row: {line:?}"); + if cols[0] != "machine" { + continue; + } + let name = cols[1]; + let tag = cols[2]; + let cycle: u64 = cols[4] + .parse() + .unwrap_or_else(|_| panic!("bad cycle: {:?}", cols[4])); + let before = parse_hash(cols[5]); + let after = parse_hash(cols[6]); + let path = dir.join(name); + + eprintln!("Rejecting {name} (expect: {tag})"); + let err = try_prove( + REPLAY_STEP_ELF, + &before, + path.to_str().unwrap(), + cycle, + &after, + ) + .expect_err(&format!("guest ACCEPTED forged log {name} (tag {tag})")); + let want = expected_message(tag); + assert!( + err.contains(want), + "rejected {name} but message {err:?} lacks {want:?}" + ); + checked += 1; + } + assert!(checked > 0, "no machine reject rows in {}", dir.display()); +} + +/// Layer-2: a valid log proven against a claim that disagrees with the journal must be +/// rejected host-side. Reuses a positive fixture with one perturbed argument. +#[test] +fn test_host_rejects_wrong_belief() { + let dir = Path::new(env!("CARTESI_STEP_LOGS_PATH")); + let text = fs::read_to_string(dir.join("_manifest.csv")) + .unwrap_or_else(|e| panic!("failed to read manifest: {e}")); + let (name, cycle, before, after) = text + .lines() + .skip(1) + .find_map(|line| { + let c: Vec<&str> = line.split(',').collect(); + if c.len() >= 7 && c[0] == "machine" { + Some(( + c[1].to_string(), + c[4].parse::().unwrap(), + parse_hash(c[5]), + parse_hash(c[6]), + )) + } else { + None + } + }) + .expect("a machine row in the positive manifest"); + let path = dir.join(&name); + let p = path.to_str().unwrap(); + + let mut bad = before; + bad[0] ^= 0xff; + let e = + try_prove(REPLAY_STEP_ELF, &bad, p, cycle, &after).expect_err("wrong root_before accepted"); + assert!(e.contains("root_hash_before mismatch"), "{e:?}"); + + let e = try_prove(REPLAY_STEP_ELF, &before, p, cycle + 1, &after) + .expect_err("wrong cycle accepted"); + assert!(e.contains("mcycle_count mismatch"), "{e:?}"); + + let mut bad = after; + bad[0] ^= 0xff; + let e = + try_prove(REPLAY_STEP_ELF, &before, p, cycle, &bad).expect_err("wrong root_after accepted"); + assert!(e.contains("root_hash_after mismatch"), "{e:?}"); +} diff --git a/risc0/rust/methods/build.rs b/risc0/rust/methods/build.rs index b34b497f5..257153f69 100644 --- a/risc0/rust/methods/build.rs +++ b/risc0/rust/methods/build.rs @@ -46,9 +46,7 @@ fn main() { .build() .unwrap(); - risc0_build::embed_methods_with_options(HashMap::from([ - ("replay_step", guest_opts), - ])); + risc0_build::embed_methods_with_options(HashMap::from([("replay_step", guest_opts)])); } else { risc0_build::embed_methods_with_options(HashMap::new()); } diff --git a/risc0/rust/methods/guest/build.rs b/risc0/rust/methods/guest/build.rs index b0e5fe557..6ecc71a74 100644 --- a/risc0/rust/methods/guest/build.rs +++ b/risc0/rust/methods/guest/build.rs @@ -19,7 +19,7 @@ fn main() { // Tell Cargo to rerun the build script if the object file changes println!("cargo:rerun-if-changed={}", RISC0_REPLAY_STEPS_OBJ_PATH); - + cc::Build::new() .object(RISC0_REPLAY_STEPS_OBJ_PATH) .compile("guest"); diff --git a/risc0/rust/methods/guest/src/main.rs b/risc0/rust/methods/guest/src/main.rs index ee4237144..fb76e4aec 100644 --- a/risc0/rust/methods/guest/src/main.rs +++ b/risc0/rust/methods/guest/src/main.rs @@ -19,8 +19,8 @@ use risc0_zkvm::guest::env; use std::ffi::CStr; use std::io::Read; risc0_zkvm::guest::entry!(main); -use std::os::raw::{c_char, c_ulong, c_ulonglong}; use risc0_zkvm::sha::{Impl, Sha256}; +use std::os::raw::{c_char, c_ulong, c_ulonglong}; type MachineHash = [u8; 32]; extern "C" { @@ -34,8 +34,6 @@ extern "C" { ); } - - #[no_mangle] pub extern "C" fn zk_abort_with_msg(msg: *const c_char) { let str = unsafe { CStr::from_ptr(msg).to_string_lossy().into_owned() }; @@ -47,35 +45,61 @@ pub extern "C" fn zk_putchar(c: u8) { print!("{}", c as char); } - #[no_mangle] -pub extern "C" fn zk_merkle_tree_hash(hash_tree_target: u64, data: *const c_char, size: c_ulong, hash: *mut c_char) { +pub extern "C" fn zk_merkle_tree_hash( + hash_tree_target: u64, + data: *const c_char, + size: c_ulong, + hash: *mut c_char, +) { if hash_tree_target != 1 { panic!("zk_merkle_tree_hash: hash_tree_target must be 1"); } if size > 32 { unsafe { let half_size = size / 2; - let left_hash = [0u8; 32]; - zk_merkle_tree_hash(hash_tree_target, data, half_size, left_hash.as_ptr() as *mut c_char); - let right_hash = [0u8; 32]; - zk_merkle_tree_hash(hash_tree_target, data.add(half_size as usize) as *const c_char, half_size, right_hash.as_ptr() as *mut c_char); + let mut left_hash = [0u8; 32]; + zk_merkle_tree_hash( + hash_tree_target, + data, + half_size, + left_hash.as_mut_ptr() as *mut c_char, + ); + let mut right_hash = [0u8; 32]; + zk_merkle_tree_hash( + hash_tree_target, + data.add(half_size as usize) as *const c_char, + half_size, + right_hash.as_mut_ptr() as *mut c_char, + ); let mut conctd = [0u8; 64]; std::ptr::copy(left_hash.as_ptr(), conctd.as_mut_ptr(), 32); std::ptr::copy(right_hash.as_ptr(), conctd.as_mut_ptr().add(32), 32); let result_bytes = Impl::hash_bytes(&conctd).as_bytes(); std::ptr::copy(result_bytes.as_ptr(), hash as *mut u8, 32); } - } else{ - let result_bytes = Impl::hash_bytes(unsafe { std::slice::from_raw_parts(data as *const u8, size as usize) }).as_bytes(); + } else { + let result_bytes = Impl::hash_bytes(unsafe { + std::slice::from_raw_parts(data as *const u8, size as usize) + }) + .as_bytes(); unsafe { std::ptr::copy(result_bytes.as_ptr(), hash as *mut u8, 32); - } + } } } +// The guest exports only the hash primitives (zk_merkle_tree_hash, zk_concat_hash). +// Composite hashes such as the padded Merkle hash are built on these in freestanding +// C++ (replay-step-state-access.hpp). + #[no_mangle] -pub extern "C" fn zk_concat_hash(hash_tree_target: u64, left: *const c_char, right: *const c_char, result: *mut c_char) { +pub extern "C" fn zk_concat_hash( + hash_tree_target: u64, + left: *const c_char, + right: *const c_char, + result: *mut c_char, +) { if hash_tree_target != 1 { panic!("zk_concat_hash: hash_tree_target must be 1"); } @@ -88,7 +112,6 @@ pub extern "C" fn zk_concat_hash(hash_tree_target: u64, left: *const c_char, rig unsafe { std::ptr::copy(result_bytes.as_ptr(), result as *mut u8, 32); } - } fn main() { diff --git a/risc0/step-log-util.lua b/risc0/step-log-util.lua deleted file mode 100755 index 69eaad9d1..000000000 --- a/risc0/step-log-util.lua +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env lua5.4 - --- Copyright Cartesi and individual authors (see AUTHORS) --- SPDX-License-Identifier: Apache-2.0 --- --- Licensed under the Apache License, Version 2.0 (the "License"); --- you may not use this file except in compliance with the License. --- You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. --- - -local HEADER_SIZE = 72 -- root_hash_before(32) + mcycle_count(8) + root_hash_after(32) - -local function hexhash(bin) - local hex = {} - for i = 1, #bin do hex[i] = string.format("%02x", string.byte(bin, i)) end - return table.concat(hex) -end - -local function read_header(path) - local f = assert(io.open(path, "rb")) - local data = f:read(HEADER_SIZE) - assert(data and #data >= HEADER_SIZE, - string.format("step log too small (got %d bytes, need %d)", data and #data or 0, HEADER_SIZE)) - local root_hash_before = hexhash(data:sub(1, 32)) - local mcycle_count = string.unpack("") - local hash_before, mcycle, hash_after = read_header(path) - print("Step log: " .. path) - print(" root_hash_before: " .. hash_before) - print(" mcycle_count: " .. mcycle) - print(" root_hash_after: " .. hash_after) -end - -commands["root-hash-before"] = function(args) - local path = assert(args[1], "usage: step-log-util.lua root-hash-before ") - local hash_before = read_header(path) - io.write(hash_before) -end - -commands["mcycle-count"] = function(args) - local path = assert(args[1], "usage: step-log-util.lua mcycle-count ") - local _, mcycle = read_header(path) - io.write(tostring(mcycle)) -end - -commands["root-hash-after"] = function(args) - local path = assert(args[1], "usage: step-log-util.lua root-hash-after ") - local _, _, hash_after = read_header(path) - io.write(hash_after) -end - --- Print help and exit -local function help() - io.stderr:write(string.format( - [=[ -Usage: - - %s [args...] - -Commands: - - info Print step log header fields - root-hash-before Print root hash before (hex) - mcycle-count Print mcycle count (decimal) - root-hash-after Print root hash after (hex) - -]=], - arg[0] - )) - os.exit() -end - --- For each option, --- first entry is the pattern to match --- second entry is a callback --- if callback returns true, the option is accepted. --- if callback returns false, the option is rejected. -local options = { - { - "^%-h$", - function(all) - if not all then return false end - help() - end, - }, - { - "^%-%-help$", - function(all) - if not all then return false end - help() - end, - }, - { - ".*", - function(all) - error("unrecognized option " .. all) - end, - }, -} - --- Process command line options -local values = {} -for _, argument in ipairs(arg) do - if argument:sub(1, 1) == "-" then - for _, option in ipairs(options) do - if option[2](argument:match(option[1])) then - break - end - end - else - values[#values + 1] = argument - end -end - -if not values[1] then help() end -local cmd_name = values[1] -assert(commands[cmd_name], "unknown command '" .. cmd_name .. "', use --help for usage") - -local cmd_args = {} -for i = 2, #values do - cmd_args[i - 1] = values[i] -end - -commands[cmd_name](cmd_args) diff --git a/solidity-step/.gitignore b/solidity-step/.gitignore new file mode 100644 index 000000000..44c6498a5 --- /dev/null +++ b/solidity-step/.gitignore @@ -0,0 +1,7 @@ +out/ +cache/ +broadcast/ +test/fixtures/ +# forge-std is a test-only dependency fetched on demand by `make dep` (see Makefile), +# not a submodule, so a C++-only checkout never pulls it. +lib/ diff --git a/solidity-step/Makefile b/solidity-step/Makefile new file mode 100644 index 000000000..cf6ec54b1 --- /dev/null +++ b/solidity-step/Makefile @@ -0,0 +1,145 @@ +# Copyright Cartesi and individual authors (see AUTHORS) +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Solidity replay library for Cartesi binary step logs. +# +# Targets: +# dep Install the test-only forge-std dependency into lib/. +# build Compile contracts via forge. +# test Run the Foundry test suite. +# coverage Line/branch coverage of src/ (incl. the transpiled files). +# fixtures Regenerate step-log fixtures from the uarch test suite. +# gen-constants Regenerate src/EmulatorConstants.sol from the built emulator. +# gen-transpiled Regenerate the transpiled .sol files from their C++ sources. +# gen-all gen-constants + gen-transpiled. +# check-gen-all CI drift check: regenerate everything, fail on any diff. +# test-transpile Golden-output unit tests for the transpiler. +# clean Remove forge build artifacts and fixtures. + +EMULATOR_DIR := $(abspath ..) +LUA_BIN ?= $(shell which lua5.4) +FIXTURES_DIR := test/fixtures + +# Restricts which uarch tests are recorded into fixtures. Default = the full +# rv64ui-uarch suite, so any instruction-handler or ECALL transpilation +# regression is caught. Override for quick iteration, e.g. +# make UARCH_TEST_PATTERN='rv64ui%-uarch%-add.-%.bin' fixtures +UARCH_TEST_PATTERN ?= rv64ui%-uarch%-.+%.bin + +.PHONY: dep build test coverage fmt fmt-check fixtures fixtures-clean clean \ + gen-constants check-gen-constants \ + gen-transpiled check-gen-transpiled \ + gen-all check-gen-all test-transpile + +# forge-std is a test-only dependency, installed on demand into the gitignored +# lib/ rather than committed as a git submodule. Keeping it out of the submodule +# set means a plain `make submodules` (or any C++-only checkout) never clones it. +dep: + @if [ ! -d lib/forge-std ]; then \ + forge install foundry-rs/forge-std@v1.16.1 --no-git; \ + else \ + echo "Dependencies already installed (lib/forge-std exists)"; \ + fi + +build: dep + forge build + +test: dep fmt-check + forge test -vv + +# --ir-minimum is required because the test profile compiles with via_ir. +coverage: dep + forge coverage --ir-minimum + +fmt: + forge fmt + +fmt-check: + forge fmt --check + +# gen-constants reads the live cartesi Lua module, so it needs the emulator's +# LUA_PATH_5_4 / LUA_CPATH_5_4 (a built cartesi.so); gen-transpiled needs only lpeg. +gen-constants: + @eval $$($(MAKE) -sC $(EMULATOR_DIR) --no-print-directory env) && \ + $(LUA_BIN) tools/gen-emulator-constants.lua > src/EmulatorConstants.sol + @forge fmt src/EmulatorConstants.sol + @echo "Regenerated src/EmulatorConstants.sol" + +check-gen-constants: + @eval $$($(MAKE) -sC $(EMULATOR_DIR) --no-print-directory env) && \ + $(LUA_BIN) tools/gen-emulator-constants.lua > /tmp/EmulatorConstants.gen.sol + @forge fmt /tmp/EmulatorConstants.gen.sol + @diff -u src/EmulatorConstants.sol /tmp/EmulatorConstants.gen.sol \ + || { echo "ERROR: src/EmulatorConstants.sol is stale. Run 'make gen-constants' and commit."; exit 1; } + @echo "src/EmulatorConstants.sol is in sync with the generator." + +gen-transpiled: + $(LUA_BIN) tools/transpile-uarch.lua $(EMULATOR_DIR)/src/uarch-step.cpp src/UArchStep.sol UArchStep uarchStep + $(LUA_BIN) tools/transpile-uarch.lua $(EMULATOR_DIR)/src/uarch-reset-state.cpp src/UArchReset.sol UArchReset uarchResetState + $(LUA_BIN) tools/transpile-uarch.lua $(EMULATOR_DIR)/src/send-cmio-response.cpp src/SendCmioResponse.sol SendCmioResponse sendCmioResponse + @forge fmt src/UArchStep.sol src/UArchReset.sol src/SendCmioResponse.sol + +check-gen-transpiled: + @$(LUA_BIN) tools/transpile-uarch.lua $(EMULATOR_DIR)/src/uarch-step.cpp /tmp/UArchStep.gen.sol UArchStep uarchStep + @$(LUA_BIN) tools/transpile-uarch.lua $(EMULATOR_DIR)/src/uarch-reset-state.cpp /tmp/UArchReset.gen.sol UArchReset uarchResetState + @$(LUA_BIN) tools/transpile-uarch.lua $(EMULATOR_DIR)/src/send-cmio-response.cpp /tmp/SendCmioResponse.gen.sol SendCmioResponse sendCmioResponse + @forge fmt /tmp/UArchStep.gen.sol /tmp/UArchReset.gen.sol /tmp/SendCmioResponse.gen.sol + @for f in UArchStep UArchReset SendCmioResponse; do \ + diff -u src/$$f.sol /tmp/$$f.gen.sol \ + || { echo "ERROR: src/$$f.sol is stale. Run 'make gen-transpiled' and commit."; exit 1; }; \ + done + @echo "All transpiled .sol files are in sync with the C++ sources." + +test-transpile: + $(LUA_BIN) tools/test-transpile-uarch.lua + +gen-all: gen-constants gen-transpiled +check-gen-all: check-gen-constants check-gen-transpiled test-transpile + +# One fixture set per recorder, each with its own _manifest.csv. The adversarial +# recorders tamper the valid logs, so they must run last; all recorders use +# emulator-relative paths, hence the cd into $(EMULATOR_DIR). +fixtures: fixtures-clean + @mkdir -p $(FIXTURES_DIR) + @cd $(EMULATOR_DIR) && eval $$($(MAKE) env) && \ + $(LUA_BIN) tests/lua/uarch-riscv-tests.lua \ + --test-path=tests/build/uarch \ + --test='$(UARCH_TEST_PATTERN)' \ + --output-dir=$(abspath $(FIXTURES_DIR))/uarch-tests \ + record_uarch_tests && \ + $(LUA_BIN) tests/lua/uarch-riscv-tests.lua \ + --test-path=tests/build/uarch \ + --test='$(UARCH_TEST_PATTERN)' \ + --output-dir=$(abspath $(FIXTURES_DIR))/uarch-tests-per-cycle \ + --per-cycle-logs \ + record_uarch_tests && \ + $(LUA_BIN) tests/lua/record-send-cmio-response.lua \ + --output-dir=$(abspath $(FIXTURES_DIR))/send-cmio-response && \ + $(LUA_BIN) tests/lua/record-reset-uarch.lua \ + --output-dir=$(abspath $(FIXTURES_DIR))/reset-uarch && \ + $(LUA_BIN) tests/lua/record-adversarial-uarch.lua \ + --fixtures-dir=$(abspath $(FIXTURES_DIR)) \ + --output-dir=$(abspath $(FIXTURES_DIR))/reject-uarch && \ + $(LUA_BIN) tests/lua/record-adversarial-send-cmio-response.lua \ + --fixtures-dir=$(abspath $(FIXTURES_DIR)) \ + --output-dir=$(abspath $(FIXTURES_DIR))/reject-send-cmio-response + +fixtures-clean: + rm -rf $(FIXTURES_DIR) + +# No forge dependency, so the top-level clean works without Foundry installed. +clean: fixtures-clean + rm -rf out cache broadcast diff --git a/solidity-step/README.md b/solidity-step/README.md new file mode 100644 index 000000000..eb0841885 --- /dev/null +++ b/solidity-step/README.md @@ -0,0 +1,25 @@ +# Cartesi step log replay - Solidity library + +Replays binary step logs (uarch step, uarch reset, send_cmio_response) on-chain. Successor to `machine-solidity-step`. The same library is used for both testing (replaying emulator-generated fixtures) and production (on-chain dispute verification by dave / rollups-contracts). + +## Quick start + +Needs Foundry; see the top-level README for setup and the pinned version. + +```bash +make build # compile contracts +make fixtures # generate step-log fixtures +make test # run Foundry tests +``` + +## Keeping the generated Solidity in sync + +Some `src/` files are generated from the emulator (transpiled handlers and exported constants), +not hand-written. After changing the emulator sources they derive from, regenerate and commit: + +```bash +make gen-all # regenerate (needs a built emulator + Foundry) +make check-gen-all # check for drift without regenerating (what CI runs) +``` + +`check-gen-all` fails with a diff naming any stale file. diff --git a/solidity-step/foundry.toml b/solidity-step/foundry.toml new file mode 100644 index 000000000..bea43914b --- /dev/null +++ b/solidity-step/foundry.toml @@ -0,0 +1,18 @@ +[profile.default] +src = "src" +out = "out" +test = "test" +solc_version = "0.8.30" +evm_version = "prague" +optimizer = true +via_ir = true +fs_permissions = [{ access = "read", path = "test/fixtures" }] +# Per-cycle replay walks every cycle of every program in one outer test frame, +# accumulating bytes allocations across thousands of vm.readFileBinary calls. +# 1 GiB is comfortable headroom for the full 54-program uarch suite. +memory_limit = 1073741824 + +[fmt] +line_length = 100 +tab_width = 4 +sort_imports = true diff --git a/solidity-step/src/EmulatorConstants.sol b/solidity-step/src/EmulatorConstants.sol new file mode 100644 index 000000000..fc178c1ef --- /dev/null +++ b/solidity-step/src/EmulatorConstants.sol @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +/// @notice Constants pulled from machine-emulator C++ headers. +/// @dev GENERATED by solidity-step/tools/gen-emulator-constants.lua - do not edit by hand. +/// Re-run via `make gen-constants`; CI fails on drift. +library EmulatorConstants { + // Address-range geometry (cartesi.AR_*). + uint64 internal constant AR_SHADOW_UARCH_STATE_START = 0x400000; + uint64 internal constant AR_UARCH_RAM_START = 0x600000; + + // Uarch register init values. + uint64 internal constant UARCH_PC_INIT = AR_UARCH_RAM_START; + + // Uarch shadow-state register addresses (cartesi.machine:get_reg_address). + uint64 internal constant UARCH_HALT_FLAG_ADDR = 0x400000; + uint64 internal constant UARCH_CYCLE_ADDR = 0x400008; + uint64 internal constant UARCH_PC_ADDR = 0x400010; + uint64 internal constant UARCH_X_BASE_ADDR = 0x400018; + + uint8 internal constant UARCH_X_REG_COUNT = 32; + + // Uarch cycle limit (cartesi.UARCH_CYCLE_MAX). + uint64 internal constant UARCH_CYCLE_MAX = 1048576; + + // Uarch ECALL function codes (cartesi.UARCH_ECALL_FN_*). + uint64 internal constant UARCH_ECALL_FN_HALT = 1; + uint64 internal constant UARCH_ECALL_FN_PUTCHAR = 2; + uint64 internal constant UARCH_ECALL_FN_WRITE_TLB = 4; + + // Uarch state geometry (cartesi.UARCH_STATE_LOG2_SIZE). + uint64 internal constant UARCH_STATE_START_ADDR = AR_SHADOW_UARCH_STATE_START; + uint8 internal constant UARCH_STATE_LOG2_SIZE = 22; + + // Root hash of the pristine uarch state (cartesi.UARCH_PRISTINE_STATE_HASH). + bytes32 internal constant UARCH_PRISTINE_STATE_HASH = + 0xeb629413f972768356aae2e512c2cf7cf1dc9ec00a1e2511ad580b5dbe23373b; + + // Hash tree geometry (cartesi.HASH_TREE_LOG2_*). + uint8 internal constant HASH_TREE_LOG2_WORD_SIZE = 5; + uint8 internal constant HASH_TREE_LOG2_PAGE_SIZE = 12; + uint8 internal constant HASH_TREE_LOG2_ROOT_SIZE = 64; + + // Step log header hash_function codes (cartesi.HASH_FUNCTION_*). The Solidity + // verifier implements keccak256 only; sha256 is the zkVM path (RISC0). + uint8 internal constant HASH_FUNCTION_KECCAK256 = 0; + uint8 internal constant HASH_FUNCTION_SHA256 = 1; + + // Page geometry (derived). + uint256 internal constant PAGE_SIZE = 1 << HASH_TREE_LOG2_PAGE_SIZE; + uint64 internal constant PAGE_OFFSET_MASK = (uint64(1) << HASH_TREE_LOG2_PAGE_SIZE) - 1; + uint256 internal constant LEAF_SIZE = uint256(1) << HASH_TREE_LOG2_WORD_SIZE; + + // CMIO buffer geometry (cartesi.AR_CMIO_RX_BUFFER_*). + uint64 internal constant AR_CMIO_RX_BUFFER_START = 0x60000000; + uint8 internal constant AR_CMIO_RX_BUFFER_LOG2_SIZE = 21; + + // Shadow TLB layout. + uint64 internal constant AR_SHADOW_TLB_START = 0x1000; + uint8 internal constant SHADOW_TLB_SLOT_LOG2_SIZE = 5; + uint64 internal constant SHADOW_TLB_SLOT_SIZE = uint64(1) << SHADOW_TLB_SLOT_LOG2_SIZE; + uint64 internal constant TLB_SET_SIZE = 256; + uint64 internal constant SHADOW_TLB_SET_LENGTH = TLB_SET_SIZE * SHADOW_TLB_SLOT_SIZE; + + // Shadow register addresses (cartesi.machine:get_reg_address). + uint64 internal constant IFLAGS_Y_ADDRESS = 0x300; + uint64 internal constant HTIF_TOHOST_ADDRESS = 0x328; + uint64 internal constant HTIF_FROMHOST_ADDR = 0x330; + + // HTIF tohost field layout [dev:8][cmd:8][reason:16][data:32]; hand-mirrored from htif-constants.hpp. + uint32 internal constant HTIF_DEV_SHIFT = 0x38; + uint32 internal constant HTIF_CMD_SHIFT = 0x30; + uint32 internal constant HTIF_REASON_SHIFT = 0x20; + uint64 internal constant HTIF_DEV_MASK = 0xff00000000000000; + uint64 internal constant HTIF_CMD_MASK = 0xff000000000000; + uint64 internal constant HTIF_REASON_MASK = 0xffff00000000; + uint64 internal constant HTIF_DEV_YIELD = 0x2; + uint64 internal constant HTIF_YIELD_CMD_MANUAL = 0x1; + uint16 internal constant HTIF_YIELD_REASON_ADVANCE_STATE = 0; + + // Shadow revert-root-hash slot - protocol-only convention; uarch never touches it. + uint64 internal constant AR_SHADOW_REVERT_ROOT_HASH_START = 0xfe0; + + // Manual yield reasons (cartesi.HTIF_YIELD_MANUAL_REASON_*). + uint16 internal constant HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED = 1; + uint16 internal constant HTIF_YIELD_MANUAL_REASON_RX_REJECTED = 2; + uint16 internal constant HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION = 4; + + // Step log signature (cartesi.STEP_LOG_SIGNATURE). + bytes8 internal constant STEP_LOG_SIGNATURE = 0x4354534903000000; +} diff --git a/solidity-step/src/HashTree.sol b/solidity-step/src/HashTree.sol new file mode 100644 index 000000000..fc9ae254b --- /dev/null +++ b/solidity-step/src/HashTree.sol @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; + +library HashTree { + error PaddedMerkleHashLog2SizeOutOfRange(uint8 log2Size); + error DataExceedsPaddedSize(uint8 log2Size, uint256 dataLength); + + function merkleTreeHash(bytes memory data, uint256 start, uint256 size) + internal + pure + returns (bytes32) + { + if (size > EmulatorConstants.LEAF_SIZE) { + uint256 half = size >> 1; + bytes32 left = merkleTreeHash(data, start, half); + bytes32 right = merkleTreeHash(data, start + half, half); + return keccak256(abi.encodePacked(left, right)); + } + bytes32 word; + assembly ("memory-safe") { + // skip 32-byte length prefix + word := mload(add(add(data, 32), start)) + } + return keccak256(abi.encodePacked(word)); + } + + /// Merkle root of `data` zero-padded to 2^totalLog2Size bytes. Subtrees entirely + /// beyond `data.length` collapse to a precomputed pristine hash; subtrees entirely + /// within fall through to `merkleTreeHash`. Only the boundary subtree at each level + /// recurses. + function merkleTreeHashPadded(bytes calldata data, uint8 totalLog2Size) + internal + pure + returns (bytes32) + { + if ( + totalLog2Size < EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE + || totalLog2Size >= EmulatorConstants.HASH_TREE_LOG2_ROOT_SIZE + ) { + revert PaddedMerkleHashLog2SizeOutOfRange(totalLog2Size); + } + if ((uint256(1) << totalLog2Size) < data.length) { + revert DataExceedsPaddedSize(totalLog2Size, data.length); + } + + // Pristine table is small and rebuilt per call (single caller, send_cmio_response). + uint8 leafLog2 = EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE; + bytes32[] memory pristine = new bytes32[](uint256(totalLog2Size) + 1); + pristine[leafLog2] = keccak256(abi.encodePacked(bytes32(0))); + for (uint8 k = leafLog2 + 1; k <= totalLog2Size; k++) { + pristine[k] = keccak256(abi.encodePacked(pristine[k - 1], pristine[k - 1])); + } + bytes memory dataMem = data; + return merkleSubtreeHashPadded(dataMem, data.length, 0, totalLog2Size, pristine); + } + + function merkleSubtreeHashPadded( + bytes memory data, + uint256 dataLength, + uint256 start, + uint8 log2Size, + bytes32[] memory pristine + ) private pure returns (bytes32) { + if (start >= dataLength) return pristine[log2Size]; + uint256 size = uint256(1) << log2Size; + if (log2Size == EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE) { + // A full in-bounds leaf goes through merkleTreeHash. A leaf straddling the + // boundary is masked to zero past dataLength (bytes-memory padding is not + // guaranteed zero); matches the C++ recorder's zeroed-buffer write. + if (start + size <= dataLength) { + return merkleTreeHash(data, start, size); + } + bytes32 word; + assembly ("memory-safe") { + word := mload(add(add(data, 32), start)) + } + uint256 zeroBits = (size - (dataLength - start)) << 3; + word = bytes32((uint256(word) >> zeroBits) << zeroBits); + return keccak256(abi.encodePacked(word)); + } + if (start + size <= dataLength) return merkleTreeHash(data, start, size); + uint256 half = size >> 1; + bytes32 left = merkleSubtreeHashPadded(data, dataLength, start, log2Size - 1, pristine); + bytes32 right = + merkleSubtreeHashPadded(data, dataLength, start + half, log2Size - 1, pristine); + return keccak256(abi.encodePacked(left, right)); + } +} diff --git a/solidity-step/src/SendCmioResponse.sol b/solidity-step/src/SendCmioResponse.sol new file mode 100644 index 000000000..8ff8389dd --- /dev/null +++ b/solidity-step/src/SendCmioResponse.sol @@ -0,0 +1,86 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +/// @dev This file is generated from C++ by solidity-step/tools/transpile-uarch.lua + +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {StateAccess} from "src/StateAccess.sol"; +import {StepLog} from "src/StepLog.sol"; + +library SendCmioResponse { + function sendCmioResponse( + StepLog.Context memory a, + bytes32 revertRootHash, + uint16 reason, + bytes calldata data, + uint32 dataLength + ) internal pure { + // This function cannot fail. When a failure is detected, the operation is a no-op instead, + // so the honest party can always log and prove the resulting state transition. + // A response to a machine that is not waiting on a manual yield is a no-op. + if (!StateAccess.readIflagsY(a)) { + return; + } + if (reason == EmulatorConstants.HTIF_YIELD_REASON_ADVANCE_STATE) { + // Advance-state responses are the input boundary of the rollups flow. They only apply to a + // machine waiting for an input on an rx-accepted manual yield. Sending one to a machine that + // yielded manual with any other reason (e.g., rejected an input or threw an exception) is a no-op. + uint64 tohost = StateAccess.readHtifTohost(a); + if ( + !StateAccess.isYieldedManualWith( + tohost, EmulatorConstants.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED + ) + ) { + return; + } + } + // A zero length data is a valid response. We just skip writing to the rx buffer. + uint32 writeLengthLog2Size = 0; + if (dataLength > 0) { + // Find the write length: the smallest power of 2 that is >= dataLength and >= tree leaf size + writeLengthLog2Size = StateAccess.uint32Log2(dataLength); + if (writeLengthLog2Size < EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE) { + writeLengthLog2Size = EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE; // minimum write size is the tree leaf size + } + if (StateAccess.uint32ShiftLeft(1, writeLengthLog2Size) < dataLength) { + writeLengthLog2Size += 1; + } + // A response with data that does not fit in the rx buffer is a no-op + if (writeLengthLog2Size > EmulatorConstants.AR_CMIO_RX_BUFFER_LOG2_SIZE) { + return; + } + } + // Record the machine root hash to revert to in case the response is eventually rejected. A consumer + // recovers it from the uarch-reset step log (whose reset accesses this slot) to revert to this state + // if the response is later rejected. + StateAccess.writeRevertRootHash(a, revertRootHash); + if (dataLength > 0) { + StateAccess.writeMemoryWithPadding( + a, EmulatorConstants.AR_CMIO_RX_BUFFER_START, data, dataLength, writeLengthLog2Size + ); + } + // Write data length and reason to fromhost + uint64 mask16 = StateAccess.uint64ShiftLeft(1, 16) - 1; + uint64 mask32 = StateAccess.uint64ShiftLeft(1, 32) - 1; + uint64 yieldData = StateAccess.uint64ShiftLeft((uint64(reason) & mask16), 32) + | (uint64(dataLength) & mask32); + StateAccess.writeHtifFromhost(a, yieldData); + // Reset iflags.Y + StateAccess.writeIflagsY(a, 0); + } +} diff --git a/solidity-step/src/StateAccess.sol b/solidity-step/src/StateAccess.sol new file mode 100644 index 000000000..0c67ab405 --- /dev/null +++ b/solidity-step/src/StateAccess.sol @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {HashTree} from "src/HashTree.sol"; +import {StepLog} from "src/StepLog.sol"; + +/// Read/write accessors over a decoded step log. The transpiled uarch interpreter +/// calls these methods; names match `machine-emulator/src/uarch-solidity-compat.hpp`, +/// semantics mirror `replay-step-state-access.hpp`. +library StateAccess { + error UnalignedWordAccess(uint64 paddr); + error UarchXRegisterOutOfRange(uint8 reg); + error WriteMemoryNodeWrongSize(uint32 expectedLog2Size, uint64 nodeLog2Size); + error WriteMemoryHashMismatch(bytes32 expected, bytes32 fromLog); + error ResetUarchWrongSize(uint64 nodeLog2Size); + error ResetUarchWrongPostHash(bytes32 fromLog); + error Uint32Log2OfZero(); + /// Wraps the message from a C++ THROW(...) call site (transpiled via throwRuntimeError). + error RuntimeError(string message); + + // Wire format stores multi-byte integers little-endian; the EVM word format + // is big-endian. readWord / writeWord internalise the swap. + + function readWord(StepLog.Context memory ctx, uint64 paddr) internal pure returns (uint64) { + if (paddr & 0x7 != 0) revert UnalignedWordAccess(paddr); + uint256 ptr = StepLog.findPage(ctx, paddr); + bytes32 word32; + assembly ("memory-safe") { + word32 := mload(ptr) + } + return StepLog.swapBytes64(uint64(bytes8(word32))); + } + + function writeWord(StepLog.Context memory ctx, uint64 paddr, uint64 val) internal pure { + if (paddr & 0x7 != 0) revert UnalignedWordAccess(paddr); + uint256 ptr = StepLog.findPageForWrite(ctx, paddr); + assembly ("memory-safe") { + // Wire format is little-endian: byte 0 = LSB of val. + mstore8(ptr, and(val, 0xff)) + mstore8(add(ptr, 1), and(shr(8, val), 0xff)) + mstore8(add(ptr, 2), and(shr(16, val), 0xff)) + mstore8(add(ptr, 3), and(shr(24, val), 0xff)) + mstore8(add(ptr, 4), and(shr(32, val), 0xff)) + mstore8(add(ptr, 5), and(shr(40, val), 0xff)) + mstore8(add(ptr, 6), and(shr(48, val), 0xff)) + mstore8(add(ptr, 7), and(shr(56, val), 0xff)) + } + } + + function readHaltFlag(StepLog.Context memory ctx) internal pure returns (uint64) { + return readWord(ctx, EmulatorConstants.UARCH_HALT_FLAG_ADDR); + } + + function writeHaltFlag(StepLog.Context memory ctx, uint64 val) internal pure { + writeWord(ctx, EmulatorConstants.UARCH_HALT_FLAG_ADDR, val); + } + + function readCycle(StepLog.Context memory ctx) internal pure returns (uint64) { + return readWord(ctx, EmulatorConstants.UARCH_CYCLE_ADDR); + } + + function writeCycle(StepLog.Context memory ctx, uint64 val) internal pure { + writeWord(ctx, EmulatorConstants.UARCH_CYCLE_ADDR, val); + } + + function readPc(StepLog.Context memory ctx) internal pure returns (uint64) { + return readWord(ctx, EmulatorConstants.UARCH_PC_ADDR); + } + + function writePc(StepLog.Context memory ctx, uint64 val) internal pure { + writeWord(ctx, EmulatorConstants.UARCH_PC_ADDR, val); + } + + function readX(StepLog.Context memory ctx, uint8 reg) internal pure returns (uint64) { + if (reg >= EmulatorConstants.UARCH_X_REG_COUNT) revert UarchXRegisterOutOfRange(reg); + return readWord(ctx, EmulatorConstants.UARCH_X_BASE_ADDR + uint64(reg) * 8); + } + + function writeX(StepLog.Context memory ctx, uint8 reg, uint64 val) internal pure { + if (reg >= EmulatorConstants.UARCH_X_REG_COUNT) revert UarchXRegisterOutOfRange(reg); + writeWord(ctx, EmulatorConstants.UARCH_X_BASE_ADDR + uint64(reg) * 8, val); + } + + /// Returns bool to match the transpiled body's `!readIflagsY(a)` pattern. + function readIflagsY(StepLog.Context memory ctx) internal pure returns (bool) { + return readWord(ctx, EmulatorConstants.IFLAGS_Y_ADDRESS) != 0; + } + + function writeIflagsY(StepLog.Context memory ctx, uint64 val) internal pure { + writeWord(ctx, EmulatorConstants.IFLAGS_Y_ADDRESS, val); + } + + function writeHtifFromhost(StepLog.Context memory ctx, uint64 val) internal pure { + writeWord(ctx, EmulatorConstants.HTIF_FROMHOST_ADDR, val); + } + + function readHtifTohost(StepLog.Context memory ctx) internal pure returns (uint64) { + return readWord(ctx, EmulatorConstants.HTIF_TOHOST_ADDRESS); + } + + // The revert root hash occupies a full 32-byte shadow slot, stored raw (no byte swap) so the + // page bytes match the C++/RISC0 replayers. The slot ends exactly at the page boundary. + function writeRevertRootHash(StepLog.Context memory ctx, bytes32 h) internal pure { + uint256 ptr = + StepLog.findPageForWrite(ctx, EmulatorConstants.AR_SHADOW_REVERT_ROOT_HASH_START); + assembly ("memory-safe") { + mstore(ptr, h) + } + } + + function readRevertRootHash(StepLog.Context memory ctx) internal pure returns (bytes32 h) { + uint256 ptr = StepLog.findPage(ctx, EmulatorConstants.AR_SHADOW_REVERT_ROOT_HASH_START); + assembly ("memory-safe") { + h := mload(ptr) + } + } + + // Records that the reset reverted the canonical state on a rejected input. The page model recomputes + // the tree from the pages (which reflect the pristine uarch), so the revert root hash is carried on + // the context and substituted as the final root hash by Verify.verifyReset. + function revertState(StepLog.Context memory ctx) internal pure { + ctx.reverted = true; + ctx.revertedRootHash = readRevertRootHash(ctx); + } + + // True when the tohost word holds a manual yield with the given reason. Mirrors isYieldedManualWith. + function isYieldedManualWith(uint64 tohost, uint64 yieldReason) internal pure returns (bool) { + uint64 dev = uint64ShiftRight( + tohost & EmulatorConstants.HTIF_DEV_MASK, EmulatorConstants.HTIF_DEV_SHIFT + ); + uint64 cmd = uint64ShiftRight( + tohost & EmulatorConstants.HTIF_CMD_MASK, EmulatorConstants.HTIF_CMD_SHIFT + ); + uint64 reason = uint64ShiftRight( + tohost & EmulatorConstants.HTIF_REASON_MASK, EmulatorConstants.HTIF_REASON_SHIFT + ); + return dev == EmulatorConstants.HTIF_DEV_YIELD + && cmd == EmulatorConstants.HTIF_YIELD_CMD_MANUAL && reason == yieldReason; + } + + function writeMemoryWithPadding( + StepLog.Context memory ctx, + uint64 paddr, + bytes calldata data, + uint64 dataLength, + uint32 writeLengthLog2Size + ) internal pure { + if (writeLengthLog2Size <= EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE) { + // Mirrors copy_n + fill_n in do_write_memory_with_padding (replay-step-state-access.hpp). + uint256 dstPtr = StepLog.findPageForWrite(ctx, paddr); + uint256 len = uint256(dataLength); + uint256 padLen = (uint256(1) << writeLengthLog2Size) - len; + assembly ("memory-safe") { + calldatacopy(dstPtr, data.offset, len) + // calldatacopy from offset >= calldatasize() zero-fills (EVM rule), giving free zero padding. + if padLen { calldatacopy(add(dstPtr, len), calldatasize(), padLen) } + } + return; + } + // Supra-page: the write spans more than one page, so the recorder did not + // include the affected pages; it summarised them as a single node entry. + bytes32 paddedHash = HashTree.merkleTreeHashPadded(data, uint8(writeLengthLog2Size)); + StepLog.NodeEntry memory n = StepLog.findNode(ctx, paddr); + if (uint32(n.log2Size) != writeLengthLog2Size) { + revert WriteMemoryNodeWrongSize(writeLengthLog2Size, n.log2Size); + } + if (n.hashAfter != paddedHash) revert WriteMemoryHashMismatch(paddedHash, n.hashAfter); + ctx.consumedNodes++; + } + + /// Mirrors do_reset_uarch in uarch-replay-step-state-access.hpp. + function resetState(StepLog.Context memory ctx) internal pure { + StepLog.NodeEntry memory n = StepLog.findNode(ctx, EmulatorConstants.UARCH_STATE_START_ADDR); + if (n.log2Size != EmulatorConstants.UARCH_STATE_LOG2_SIZE) { + revert ResetUarchWrongSize(n.log2Size); + } + if (n.hashAfter != EmulatorConstants.UARCH_PRISTINE_STATE_HASH) { + revert ResetUarchWrongPostHash(n.hashAfter); + } + ctx.consumedNodes++; + } + + // ECALL helpers - match the no-op / write shape of the C++ replay accessor. + + /// No-op in replay; mirrors do_putchar (returns false, write goes nowhere). + function putCharECALL(StepLog.Context memory, uint8) internal pure {} + + /// Slot layout (per shadow_tlb_slot in machine-emulator/src/shadow-tlb.hpp): + /// offset 0: vaddr_page (uint64) + /// offset 8: vp_offset (uint64) + /// offset 16: pma_index (uint64) + /// offset 24: zero_padding_ (always 0) + function writeTlbECALL( + StepLog.Context memory ctx, + uint64 setIndex, + uint64 slotIndex, + uint64 vaddrPage, + uint64 vpOffset, + uint64 pmaIndex + ) internal pure { + uint64 slotAddr = EmulatorConstants.AR_SHADOW_TLB_START + + setIndex * EmulatorConstants.SHADOW_TLB_SET_LENGTH + + slotIndex * EmulatorConstants.SHADOW_TLB_SLOT_SIZE; + + writeWord(ctx, slotAddr + 0, vaddrPage); + writeWord(ctx, slotAddr + 8, vpOffset); + writeWord(ctx, slotAddr + 16, pmaIndex); + writeWord(ctx, slotAddr + 24, 0); // zero_padding_ + } + + function throwRuntimeError(StepLog.Context memory, string memory message) internal pure { + revert RuntimeError(message); + } + + // Pure math helpers - transpiler bridge to uarch-solidity-compat.hpp. + // `unchecked` everywhere to preserve C++ wraparound semantics. + + function uint32Log2(uint32 v) internal pure returns (uint32) { + if (v == 0) revert Uint32Log2OfZero(); + uint32 r = 0; + if (v >= 1 << 16) { + v >>= 16; + r += 16; + } + if (v >= 1 << 8) { + v >>= 8; + r += 8; + } + if (v >= 1 << 4) { + v >>= 4; + r += 4; + } + if (v >= 1 << 2) { + v >>= 2; + r += 2; + } + if (v >= 1 << 1) r += 1; + return r; + } + + function uint64AddInt32(uint64 v, int32 w) internal pure returns (uint64) { + unchecked { + return v + uint64(int64(w)); + } + } + + function uint64AddUint64(uint64 v, uint64 w) internal pure returns (uint64) { + unchecked { + return v + w; + } + } + + function uint64SubUint64(uint64 v, uint64 w) internal pure returns (uint64) { + unchecked { + return v - w; + } + } + + function uint64ShiftRight(uint64 v, uint32 count) internal pure returns (uint64) { + return v >> (count & 0x3f); + } + + function uint64ShiftLeft(uint64 v, uint32 count) internal pure returns (uint64) { + unchecked { + return v << (count & 0x3f); + } + } + + function int64ShiftRight(int64 v, uint32 count) internal pure returns (int64) { + return v >> (count & 0x3f); + } + + function int64AddInt64(int64 v, int64 w) internal pure returns (int64) { + unchecked { + return v + w; + } + } + + function uint32ShiftRight(uint32 v, uint32 count) internal pure returns (uint32) { + return v >> (count & 0x1f); + } + + function uint32ShiftLeft(uint32 v, uint32 count) internal pure returns (uint32) { + unchecked { + return v << (count & 0x1f); + } + } + + function int32ShiftRight(int32 v, uint32 count) internal pure returns (int32) { + return v >> (count & 0x1f); + } + + function int32AddInt32(int32 v, int32 w) internal pure returns (int32) { + unchecked { + return v + w; + } + } + + function int32SubInt32(int32 v, int32 w) internal pure returns (int32) { + unchecked { + return v - w; + } + } + + /// C++ static_cast(uint64): low 32 bits reinterpreted as signed. + function uint64ToInt32(uint64 v) internal pure returns (int32) { + return int32(uint32(v)); + } + + function int32ToUint64(int32 v) internal pure returns (uint64) { + return uint64(int64(v)); + } + + function int16ToUint64(int16 v) internal pure returns (uint64) { + return uint64(int64(v)); + } + + function int8ToUint64(int8 v) internal pure returns (uint64) { + return uint64(int64(v)); + } +} diff --git a/solidity-step/src/StepLog.sol b/solidity-step/src/StepLog.sol new file mode 100644 index 000000000..2fb0652cb --- /dev/null +++ b/solidity-step/src/StepLog.sol @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {HashTree} from "src/HashTree.sol"; + +/// Binary step log decoder. +library StepLog { + error HeaderTruncated(); + error InvalidSignature(); + error UnsupportedHashFunction(uint64 code); + error LogTruncated(); + /// A valid step log always witnesses at least one page; a zero page count is malformed. + error PageCountZero(); + error PagesNotInOrder(); + /// A page entry's scratch_hash field is reserved and must be zero on the wire. + error NonZeroScratchHash(); + error NodeLog2SizeOutOfRange(uint64 log2Size); + error NodeNotAligned(uint64 addr, uint64 log2Size); + error OverlappingEntries(); + error TooManyPages(); + error TooManyNodes(); + error TooManySiblings(); + error TooFewSiblings(); + error PageCountExceedsLimit(uint64 declared); + error NodeCountExceedsLimit(uint64 declared); + error SiblingCountExceedsLimit(uint64 declared); + error RequiredPageNotFound(uint64 pageIdx); + error RequiredNodeNotFound(uint64 addr); + /// Merkle-integrity check on the pre-state: recomputed root from pages+nodes + /// +siblings did not match the header's rootHashBefore. + error InitialRootHashMismatch(); + /// decode found bytes left over after the step log body. + error TrailingBytes(uint256 consumed, uint256 length); + /// A node's hashAfter is folded into the post-state root verbatim, so every node + /// must be produced by a semantic write; some node was never consumed by the replay. + error UnconsumedNodes(uint256 consumed, uint256 total); + + /// Decoded step log. Fields are unverified wire claims until Verify.verifyXXX runs computeRootHash. + struct Context { + bytes32 rootHashBefore; + uint64 requestedCycleCount; + bytes32 rootHashAfter; + uint8 hashFunction; + uint64[] pageIndices; // strictly ascending + bytes pageData; // pageCount * PAGE_SIZE bytes; page i at offset i * PAGE_SIZE + bytes32[] pageHashes; // filled by computeRootHash + NodeEntry[] nodes; // internal nodes covering regions > PAGE_SIZE; strictly ascending by addr, no overlaps with pages + bytes32[] siblings; // hash values for subtrees not covered by pages or nodes; left-to-right order + uint256 consumedNodes; // count of nodes a semantic write looked up during replay; see computeRootHash + bool reverted; // set when uarch reset reverted the state on a rejected input; see Verify.verifyReset + bytes32 revertedRootHash; // canonical post-state hash when reverted (the recorded revert root hash) + } + + uint256 internal constant HASH_SIZE = 32; + uint256 internal constant U64_SIZE = 8; + + uint256 internal constant HEADER_SIZE = 8 // signature + + HASH_SIZE // rootHashBefore + + U64_SIZE // requestedCycleCount + + HASH_SIZE // rootHashAfter + + U64_SIZE // hashFunction + + U64_SIZE // pageCount + + U64_SIZE // nodeCount + + U64_SIZE; // siblingCount + + uint256 internal constant PAGE_ENTRY_SIZE = U64_SIZE // page index + + EmulatorConstants.PAGE_SIZE // data + + HASH_SIZE; // scratch_hash + + /// Whole-subtree update encoded as pre/post hashes. + struct NodeEntry { + uint64 addr; // subtree start address + uint64 log2Size; // log2 of subtree size (> page-log2, <= root-log2) + bytes32 hashBefore; // subtree root hash before the step + bytes32 hashAfter; // subtree root hash after the step + } + + uint256 internal constant NODE_ENTRY_SIZE = U64_SIZE // addr + + U64_SIZE // log2_size + + HASH_SIZE // hash_before + + HASH_SIZE; // hash_after + + // Caps for one uarch-granularity log (step/reset/cmio), the only logs verified here. + // A uarch step touches <=3 pages (shadow+fetch+data), reset/cmio <=1 node (corpus max 3/1/69). + // Sibling cap must stay >= 52 (tree depth) * MAX_PAGE_COUNT so a maximally-spread log is not + // wrongly rejected. + uint64 internal constant MAX_PAGE_COUNT = 8; + uint64 internal constant MAX_NODE_COUNT = 4; + uint64 internal constant MAX_SIBLING_COUNT = 512; + + /// Decode a standalone step log that must occupy the entire buffer; reverts on + /// trailing bytes. Use `decodeAt` for multi-log cursor composition. + function decode(bytes calldata data) internal pure returns (Context memory ctx) { + uint256 newOffset; + (ctx, newOffset) = decodeAt(data, 0); + if (newOffset != data.length) revert TrailingBytes(newOffset, data.length); + } + + /// Decode + structurally validate the step log at `offset`, verify its pre-state + /// Merkle root, and return the offset just past it (cursor primitive for multi-log + /// composition). The returned `rootHashBefore` is trustworthy; the post-state and + /// caller-belief checks still happen in Verify.verifyXXX. + function decodeAt(bytes calldata data, uint256 offset) + internal + pure + returns (Context memory ctx, uint256 newOffset) + { + // Subtraction-style bounds: offset is caller-supplied, so offset + HEADER_SIZE + // could overflow and panic before this revert. Compare against the remaining + // length instead. + if (offset > data.length || data.length - offset < HEADER_SIZE) revert HeaderTruncated(); + uint256 cursor = offset; + + if (bytes8(data[cursor:cursor + 8]) != EmulatorConstants.STEP_LOG_SIGNATURE) { + revert InvalidSignature(); + } + cursor += 8; + + ctx.rootHashBefore = bytes32(data[cursor:cursor + HASH_SIZE]); + cursor += HASH_SIZE; + + ctx.requestedCycleCount = readLE64(data, cursor); + cursor += U64_SIZE; + + ctx.rootHashAfter = bytes32(data[cursor:cursor + HASH_SIZE]); + cursor += HASH_SIZE; + + // This verifier implements keccak256 only + uint64 hashFn = readLE64(data, cursor); + cursor += U64_SIZE; + if (hashFn != EmulatorConstants.HASH_FUNCTION_KECCAK256) { + revert UnsupportedHashFunction(hashFn); + } + ctx.hashFunction = uint8(hashFn); + + uint64 pageCount = readLE64(data, cursor); + cursor += U64_SIZE; + uint64 nodeCount = readLE64(data, cursor); + cursor += U64_SIZE; + uint64 siblingCount = readLE64(data, cursor); + cursor += U64_SIZE; + + if (pageCount > MAX_PAGE_COUNT) revert PageCountExceedsLimit(pageCount); + if (pageCount == 0) revert PageCountZero(); + if (nodeCount > MAX_NODE_COUNT) revert NodeCountExceedsLimit(nodeCount); + if (siblingCount > MAX_SIBLING_COUNT) revert SiblingCountExceedsLimit(siblingCount); + + // Counts are capped (MAX_*_COUNT), so total cannot overflow; offset <= data.length + // holds from the header bound above, so the subtraction is safe. + uint256 total = HEADER_SIZE + uint256(pageCount) * PAGE_ENTRY_SIZE + + uint256(nodeCount) * NODE_ENTRY_SIZE + uint256(siblingCount) * HASH_SIZE; + if (data.length - offset < total) revert LogTruncated(); + + ctx.pageIndices = new uint64[](pageCount); + ctx.pageData = new bytes(uint256(pageCount) * EmulatorConstants.PAGE_SIZE); + ctx.pageHashes = new bytes32[](pageCount); + + for (uint64 i = 0; i < pageCount; i++) { + uint64 idx = readLE64(data, cursor); + cursor += U64_SIZE; + if (i > 0 && idx <= ctx.pageIndices[i - 1]) revert PagesNotInOrder(); + ctx.pageIndices[i] = idx; + + copyPageData(data, cursor, ctx.pageData, i); + cursor += EmulatorConstants.PAGE_SIZE; + + if (bytes32(data[cursor:cursor + HASH_SIZE]) != bytes32(0)) revert NonZeroScratchHash(); + cursor += HASH_SIZE; + } + + ctx.nodes = new NodeEntry[](nodeCount); + for (uint64 i = 0; i < nodeCount; i++) { + NodeEntry memory n; + + n.addr = readLE64(data, cursor); + cursor += U64_SIZE; + + uint64 log2Size = readLE64(data, cursor); + cursor += U64_SIZE; + if ( + log2Size <= EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE + || log2Size > EmulatorConstants.HASH_TREE_LOG2_ROOT_SIZE + ) revert NodeLog2SizeOutOfRange(log2Size); + n.log2Size = log2Size; + // alignment: addr % (1<= pageCnt) { + takePage = false; + } else if (ni >= nodeCnt) { + takePage = true; + } else { + uint256 pageStart = + uint256(ctx.pageIndices[pi]) << EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE; + takePage = pageStart < ctx.nodes[ni].addr; + } + if (takePage) { + entryStart = + uint256(ctx.pageIndices[pi]) << EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE; + entryEnd = entryStart + pageSize; + pi++; + } else { + entryStart = ctx.nodes[ni].addr; + entryEnd = entryStart + (uint256(1) << ctx.nodes[ni].log2Size); + ni++; + } + if (entryStart < prevEnd) revert OverlappingEntries(); + prevEnd = entryEnd; + } + } + + /// Hashes each page lazily into ctx.pageHashes, then folds the tree. + /// `useAfter` picks each node's hashAfter (true) or hashBefore (false). + /// A zero pageHashes slot means "needs hashing": slots start zero, and every write zeroes the + /// written page's slot (findPageForWrite). So the pre-state call hashes all pages and the + /// post-state call rehashes only the pages the operation wrote; clean pages keep the hash the + /// pre-state call already validated against rootHashBefore, byte-identical after the step. + function computeRootHash(Context memory ctx, bool useAfter) internal pure returns (bytes32) { + uint256 pageCnt = ctx.pageIndices.length; + for (uint256 i = 0; i < pageCnt; i++) { + if (ctx.pageHashes[i] == bytes32(0)) { + ctx.pageHashes[i] = HashTree.merkleTreeHash( + ctx.pageData, i * EmulatorConstants.PAGE_SIZE, EmulatorConstants.PAGE_SIZE + ); + } + } + TreeWalkCursors memory c; + bytes32 root = computeSubtreeHash( + ctx, + c, + 0, // start page index + uint8( + EmulatorConstants.HASH_TREE_LOG2_ROOT_SIZE + - EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE + ), // log2PageCount + useAfter + ); + if (c.nextPage != pageCnt) revert TooManyPages(); + if (c.nextNode != ctx.nodes.length) revert TooManyNodes(); + if (c.nextSibling != ctx.siblings.length) revert TooManySiblings(); + if (useAfter) { + checkAllNodesConsumed(ctx); + } + return root; + } + + struct TreeWalkCursors { + uint256 nextPage; + uint256 nextNode; + uint256 nextSibling; + } + + /// Recursively computes the Merkle hash of one subtree, descending until each + /// covered region resolves to a logged page, node, or sibling. Returns that hash. + /// @param c page/node/sibling cursors, advanced in place as entries are consumed + /// @param useAfter pick each matched node's hashAfter (post-state) over hashBefore (pre-state) + function computeSubtreeHash( + Context memory ctx, + TreeWalkCursors memory c, + uint64 pageIndex, + uint8 log2PageCount, + bool useAfter + ) private pure returns (bytes32) { + uint256 subtreeStartAddr = uint256(pageIndex) << EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE; + uint8 subtreeLog2Size = log2PageCount + EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE; + uint64 subtreeEndPageIndex = pageIndex + (uint64(1) << log2PageCount); + + bool pageIn = + c.nextPage < ctx.pageIndices.length && ctx.pageIndices[c.nextPage] < subtreeEndPageIndex; + bool nodeIn = c.nextNode < ctx.nodes.length + && (ctx.nodes[c.nextNode].addr >> EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE) + < subtreeEndPageIndex; + + if (!pageIn && !nodeIn) { + // no page or node in this subtree, so it must be a sibling. The caller must have + if (c.nextSibling >= ctx.siblings.length) revert TooFewSiblings(); + return ctx.siblings[c.nextSibling++]; + } + + if ( + nodeIn && ctx.nodes[c.nextNode].addr == subtreeStartAddr + && ctx.nodes[c.nextNode].log2Size == subtreeLog2Size + ) { + // The subtree is fully covered by a node; consume it and return its hash. + NodeEntry memory n = ctx.nodes[c.nextNode++]; + return useAfter ? n.hashAfter : n.hashBefore; + } + + if (log2PageCount > 0) { + // The subtree is partially covered by pages/nodes, so recurse to the left and right halves. + bytes32 left = computeSubtreeHash(ctx, c, pageIndex, log2PageCount - 1, useAfter); + uint64 halfwayPageIndex = pageIndex + (uint64(1) << (log2PageCount - 1)); + bytes32 right = + computeSubtreeHash(ctx, c, halfwayPageIndex, log2PageCount - 1, useAfter); + return keccak256(abi.encodePacked(left, right)); + } + // Leaf: must be a page (nodes have log2Size > HASH_TREE_LOG2_PAGE_SIZE). + return ctx.pageHashes[c.nextPage++]; + } + + /// Assert every witnessed node was consumed by a semantic write during replay. + /// Post-state soundness: each node's hashAfter is taken from the wire and folded into + /// rootHashAfter, so an unconsumed node would inject an arbitrary post-state subtree. Every node + /// must have been looked up by a semantic write (reset/cmio); a uarch step writes only pages, so it + /// must carry no nodes at all. computeRootHash(true) calls this; a reverted operation substitutes a + /// recorded root instead of recomputing it, so it must call this explicitly to keep the guarantee. + function checkAllNodesConsumed(Context memory ctx) internal pure { + if (ctx.consumedNodes != ctx.nodes.length) { + revert UnconsumedNodes(ctx.consumedNodes, ctx.nodes.length); + } + } + + /// Returns a raw memory pointer to byte `paddr` inside ctx.pageData. Callers consume the pointer + /// immediately in an assembly block; safe because `bytes memory` never moves. Read paths use this; + /// write paths use findPageForWrite so the page is rehashed on the post-state root. + function findPage(Context memory ctx, uint64 paddr) internal pure returns (uint256 memPtr) { + (memPtr,) = locatePage(ctx, paddr); + } + + /// Like findPage, but invalidates the page's cached hash (zeroes its pageHashes slot) so + /// computeRootHash rehashes it on the post-state pass. Every path that mutates pageData resolves + /// through here. + function findPageForWrite(Context memory ctx, uint64 paddr) + internal + pure + returns (uint256 memPtr) + { + uint256 pageIdx; + (memPtr, pageIdx) = locatePage(ctx, paddr); + ctx.pageHashes[pageIdx] = bytes32(0); + } + + function locatePage(Context memory ctx, uint64 paddr) + private + pure + returns (uint256 memPtr, uint256 pageIdx) + { + uint64 idx = paddr >> EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE; + uint256 lo = 0; + uint256 hi = ctx.pageIndices.length; + while (lo < hi) { + uint256 mid = (lo + hi) >> 1; + if (ctx.pageIndices[mid] < idx) { + lo = mid + 1; + } else { + hi = mid; + } + } + if (lo >= ctx.pageIndices.length || ctx.pageIndices[lo] != idx) { + revert RequiredPageNotFound(idx); + } + pageIdx = lo; + uint256 byteOff = lo * EmulatorConstants.PAGE_SIZE + + (uint256(paddr) & uint256(EmulatorConstants.PAGE_OFFSET_MASK)); + bytes memory pd = ctx.pageData; + assembly ("memory-safe") { + memPtr := add(add(pd, 32), byteOff) + } + } + + // Find a node by its subtree start address. + function findNode(Context memory ctx, uint64 addr) internal pure returns (NodeEntry memory) { + for (uint256 i = 0; i < ctx.nodes.length; i++) { + if (ctx.nodes[i].addr == addr) return ctx.nodes[i]; + } + revert RequiredNodeNotFound(addr); + } + + function readLE64(bytes calldata data, uint256 off) private pure returns (uint64) { + return swapBytes64(uint64(bytes8(data[off:off + 8]))); + } + + function copyPageData(bytes calldata data, uint256 srcOff, bytes memory dst, uint64 pageIdx) + private + pure + { + uint256 dstOff = uint256(pageIdx) * EmulatorConstants.PAGE_SIZE; + uint256 pageSize = EmulatorConstants.PAGE_SIZE; + assembly ("memory-safe") { + // skip 32-byte length prefix + let dstPtr := add(add(dst, 32), dstOff) + calldatacopy(dstPtr, add(data.offset, srcOff), pageSize) + } + } + + function swapBytes64(uint64 v) internal pure returns (uint64) { + return ((v & 0x00000000000000ff) << 56) | ((v & 0x000000000000ff00) << 40) + | ((v & 0x0000000000ff0000) << 24) | ((v & 0x00000000ff000000) << 8) + | ((v & 0x000000ff00000000) >> 8) | ((v & 0x0000ff0000000000) >> 24) + | ((v & 0x00ff000000000000) >> 40) | ((v & 0xff00000000000000) >> 56); + } +} diff --git a/solidity-step/src/UArchReset.sol b/solidity-step/src/UArchReset.sol new file mode 100644 index 000000000..fc60893fa --- /dev/null +++ b/solidity-step/src/UArchReset.sol @@ -0,0 +1,42 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +/// @dev This file is generated from C++ by solidity-step/tools/transpile-uarch.lua + +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {StateAccess} from "src/StateAccess.sol"; +import {StepLog} from "src/StepLog.sol"; + +library UArchReset { + function uarchResetState(StepLog.Context memory a) internal pure { + StateAccess.resetState(a); + // When the machine has rejected an input, the canonical state after the operation is + // the one recorded in the revert root hash (which has a pristine uarch) + uint64 iflagsY = StateAccess.readWord(a, EmulatorConstants.IFLAGS_Y_ADDRESS); + if (iflagsY != 0) { + uint64 tohost = StateAccess.readWord(a, EmulatorConstants.HTIF_TOHOST_ADDRESS); + if ( + StateAccess.isYieldedManualWith( + tohost, EmulatorConstants.HTIF_YIELD_MANUAL_REASON_RX_REJECTED + ) + ) { + StateAccess.revertState(a); + } + } + } +} diff --git a/solidity-step/src/UArchStep.sol b/solidity-step/src/UArchStep.sol new file mode 100644 index 000000000..7345d17ba --- /dev/null +++ b/solidity-step/src/UArchStep.sol @@ -0,0 +1,1042 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +/// @dev This file is generated from C++ by solidity-step/tools/transpile-uarch.lua + +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {StateAccess} from "src/StateAccess.sol"; +import {StepLog} from "src/StepLog.sol"; + +library UArchStep { + enum UArchStepStatus { + Success, // one micro instruction was executed successfully + CycleOverflow, // already at fixed point: uarch cycle has reached its maximum value + UArchHalted // already at fixed point: microarchitecture is halted + + } + + // Memory read/write access + + function readUint64(StepLog.Context memory a, uint64 paddr) private pure returns (uint64) { + require((paddr & 7) == 0, "misaligned readUint64 address"); + return StateAccess.readWord(a, paddr); + } + + function readUint32(StepLog.Context memory a, uint64 paddr) private pure returns (uint32) { + require((paddr & 3) == 0, "misaligned readUint32 address"); + uint64 palign = paddr & ~uint64(7); + uint32 bitoffset = StateAccess.uint32ShiftLeft(uint32(paddr) & uint32(7), 3); + uint64 val64 = readUint64(a, palign); + return uint32(StateAccess.uint64ShiftRight(val64, bitoffset)); + } + + function readUint16(StepLog.Context memory a, uint64 paddr) private pure returns (uint16) { + require((paddr & 1) == 0, "misaligned readUint16 address"); + uint64 palign = paddr & ~uint64(7); + uint32 bitoffset = StateAccess.uint32ShiftLeft(uint32(paddr) & uint32(7), 3); + uint64 val64 = readUint64(a, palign); + return uint16(StateAccess.uint64ShiftRight(val64, bitoffset)); + } + + function readUint8(StepLog.Context memory a, uint64 paddr) private pure returns (uint8) { + uint64 palign = paddr & ~uint64(7); + uint32 bitoffset = StateAccess.uint32ShiftLeft(uint32(paddr) & uint32(7), 3); + uint64 val64 = readUint64(a, palign); + return uint8(StateAccess.uint64ShiftRight(val64, bitoffset)); + } + + function writeUint64(StepLog.Context memory a, uint64 paddr, uint64 val) private pure { + require((paddr & 7) == 0, "misaligned writeUint64 address"); + StateAccess.writeWord(a, paddr, val); + } + + /// \brief Copies bits from a uint64 word, starting at bit 0, to another uint64 word at the specified bit offset. + /// \param from Source of bits to copy, starting at offset 0. + /// \param count Number of bits to copy. + /// \param to Destination of copy. + /// \param offset Bit offset in destination to copy bits to. + /// \return The uint64 word containing the copy result. + function copyBits(uint32 from, uint32 count, uint64 to, uint32 offset) + private + pure + returns (uint64) + { + require(offset + count <= 64, "copyBits count exceeds limit of 64"); + uint64 eraseMask = StateAccess.uint64ShiftLeft(1, count) - 1; + eraseMask = ~StateAccess.uint64ShiftLeft(eraseMask, offset); + return StateAccess.uint64ShiftLeft(from, offset) | (to & eraseMask); + } + + function writeUint32(StepLog.Context memory a, uint64 paddr, uint32 val) private pure { + require((paddr & 3) == 0, "misaligned writeUint32 address"); + uint64 palign = paddr & ~uint64(7); + + uint32 bitoffset = StateAccess.uint32ShiftLeft(uint32(paddr) & uint32(7), 3); + uint64 oldval64 = readUint64(a, palign); + uint64 newval64 = copyBits(val, 32, oldval64, bitoffset); + writeUint64(a, palign, newval64); + } + + function writeUint16(StepLog.Context memory a, uint64 paddr, uint16 val) private pure { + require((paddr & 1) == 0, "misaligned writeUint16 address"); + uint64 palign = paddr & ~uint64(7); + uint32 bitoffset = StateAccess.uint32ShiftLeft(uint32(paddr) & uint32(7), 3); + uint64 oldval64 = readUint64(a, palign); + uint64 newval64 = copyBits(val, 16, oldval64, bitoffset); + writeUint64(a, palign, newval64); + } + + function writeUint8(StepLog.Context memory a, uint64 paddr, uint8 val) private pure { + uint64 palign = paddr & ~uint64(7); + uint32 bitoffset = StateAccess.uint32ShiftLeft(uint32(paddr) & uint32(7), 3); + uint64 oldval64 = readUint64(a, palign); + uint64 newval64 = copyBits(val, 8, oldval64, bitoffset); + writeUint64(a, palign, newval64); + } + + // Instruction operand decoders + + function operandRd(uint32 insn) private pure returns (uint8) { + return uint8(StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 20), 27)); + } + + function operandRs1(uint32 insn) private pure returns (uint8) { + return uint8(StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 12), 27)); + } + + function operandRs2(uint32 insn) private pure returns (uint8) { + return uint8(StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 7), 27)); + } + + function operandImm12(uint32 insn) private pure returns (int32) { + return StateAccess.int32ShiftRight(int32(insn), 20); + } + + function operandImm20(uint32 insn) private pure returns (int32) { + return int32(StateAccess.uint32ShiftLeft(StateAccess.uint32ShiftRight(insn, 12), 12)); + } + + function operandJimm20(uint32 insn) private pure returns (int32) { + int32 a = int32( + StateAccess.uint32ShiftLeft(uint32(StateAccess.int32ShiftRight(int32(insn), 31)), 20) + ); + uint32 b = StateAccess.uint32ShiftLeft( + StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 1), 22), 1 + ); + uint32 c = StateAccess.uint32ShiftLeft( + StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 11), 31), 11 + ); + uint32 d = StateAccess.uint32ShiftLeft( + StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 12), 24), 12 + ); + return int32(uint32(a) | b | c | d); + } + + function operandShamt5(uint32 insn) private pure returns (int32) { + return int32(StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 7), 27)); + } + + function operandShamt6(uint32 insn) private pure returns (int32) { + return int32(StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 6), 26)); + } + + function operandSbimm12(uint32 insn) private pure returns (int32) { + int32 a = int32( + StateAccess.uint32ShiftLeft(uint32(StateAccess.int32ShiftRight(int32(insn), 31)), 12) + ); + uint32 b = StateAccess.uint32ShiftLeft( + StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 1), 26), 5 + ); + uint32 c = StateAccess.uint32ShiftLeft( + StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 20), 28), 1 + ); + uint32 d = StateAccess.uint32ShiftLeft( + StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 24), 31), 11 + ); + return int32(uint32(a) | b | c | d); + } + + function operandSimm12(uint32 insn) private pure returns (int32) { + return int32( + StateAccess.uint32ShiftLeft(uint32(StateAccess.int32ShiftRight(int32(insn), 25)), 5) + | StateAccess.uint32ShiftRight(StateAccess.uint32ShiftLeft(insn, 20), 27) + ); + } + + // Execute instruction + + function advancePc(StepLog.Context memory a, uint64 pc) private pure { + uint64 newPc = StateAccess.uint64AddUint64(pc, 4); + return StateAccess.writePc(a, newPc); + } + + function branch(StepLog.Context memory a, uint64 pc) private pure { + return StateAccess.writePc(a, pc); + } + + function executeLUI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + int32 imm = operandImm20(insn); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(imm)); + } + return advancePc(a, pc); + } + + function executeAUIPC(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm20(insn); + uint8 rd = operandRd(insn); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeJAL(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandJimm20(insn); + uint8 rd = operandRd(insn); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.uint64AddUint64(pc, 4)); + } + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + + function executeJALR(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.uint64AddUint64(pc, 4)); + } + return branch(a, StateAccess.uint64AddInt32(rs1val, imm) & (~uint64(1))); + } + + function executeBEQ(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSbimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + if (rs1val == rs2val) { + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeBNE(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSbimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + if (rs1val != rs2val) { + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeBLT(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSbimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + int64 rs1val = int64(StateAccess.readX(a, rs1)); + int64 rs2val = int64(StateAccess.readX(a, rs2)); + if (rs1val < rs2val) { + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeBGE(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSbimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + int64 rs1val = int64(StateAccess.readX(a, rs1)); + int64 rs2val = int64(StateAccess.readX(a, rs2)); + if (rs1val >= rs2val) { + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeBLTU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSbimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + if (rs1val < rs2val) { + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeBGEU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSbimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + if (rs1val >= rs2val) { + return branch(a, StateAccess.uint64AddInt32(pc, imm)); + } + return advancePc(a, pc); + } + + function executeLB(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + int8 i8 = int8(readUint8(a, StateAccess.uint64AddInt32(rs1val, imm))); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int8ToUint64(i8)); + } + return advancePc(a, pc); + } + + function executeLHU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint16 u16 = readUint16(a, StateAccess.uint64AddInt32(rs1val, imm)); + if (rd != 0) { + StateAccess.writeX(a, rd, u16); + } + return advancePc(a, pc); + } + + function executeLH(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + int16 i16 = int16(readUint16(a, StateAccess.uint64AddInt32(rs1val, imm))); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int16ToUint64(i16)); + } + return advancePc(a, pc); + } + + function executeLW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + int32 i32 = int32(readUint32(a, StateAccess.uint64AddInt32(rs1val, imm))); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(i32)); + } + return advancePc(a, pc); + } + + function executeLBU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint8 u8 = readUint8(a, StateAccess.uint64AddInt32(rs1val, imm)); + if (rd != 0) { + StateAccess.writeX(a, rd, u8); + } + return advancePc(a, pc); + } + + function executeSB(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + writeUint8(a, StateAccess.uint64AddInt32(rs1val, imm), uint8(rs2val)); + return advancePc(a, pc); + } + + function executeSH(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + writeUint16(a, StateAccess.uint64AddInt32(rs1val, imm), uint16(rs2val)); + return advancePc(a, pc); + } + + function executeSW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint32 rs2val = uint32(StateAccess.readX(a, rs2)); + writeUint32(a, StateAccess.uint64AddInt32(rs1val, imm), rs2val); + return advancePc(a, pc); + } + + function executeADDI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + int64 val = StateAccess.int64AddInt64(int64(rs1val), int64(imm)); + StateAccess.writeX(a, rd, uint64(val)); + } + return advancePc(a, pc); + } + + function executeADDIW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + int32 rs1val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs1)); + if (rd != 0) { + int32 val = StateAccess.int32AddInt32(rs1val, imm); + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(val)); + } + return advancePc(a, pc); + } + + function executeSLTI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + int64 rs1val = int64(StateAccess.readX(a, rs1)); + if (rs1val < imm) { + StateAccess.writeX(a, rd, 1); + } else { + StateAccess.writeX(a, rd, 0); + } + } + return advancePc(a, pc); + } + + function executeSLTIU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + if (rs1val < StateAccess.int32ToUint64(imm)) { + StateAccess.writeX(a, rd, 1); + } else { + StateAccess.writeX(a, rd, 0); + } + } + return advancePc(a, pc); + } + + function executeXORI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + StateAccess.writeX(a, rd, rs1val ^ StateAccess.int32ToUint64(imm)); + } + return advancePc(a, pc); + } + + function executeORI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + StateAccess.writeX(a, rd, rs1val | StateAccess.int32ToUint64(imm)); + } + return advancePc(a, pc); + } + + function executeANDI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + StateAccess.writeX(a, rd, rs1val & StateAccess.int32ToUint64(imm)); + } + return advancePc(a, pc); + } + + function executeSLLI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandShamt6(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + StateAccess.writeX(a, rd, StateAccess.uint64ShiftLeft(rs1val, uint32(imm))); + } + return advancePc(a, pc); + } + + function executeSLLIW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandShamt5(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint32 rs1val = uint32(StateAccess.readX(a, rs1)); + if (rd != 0) { + StateAccess.writeX( + a, + rd, + StateAccess.int32ToUint64(int32(StateAccess.uint32ShiftLeft(rs1val, uint32(imm)))) + ); + } + return advancePc(a, pc); + } + + function executeSRLI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandShamt6(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + StateAccess.writeX(a, rd, StateAccess.uint64ShiftRight(rs1val, uint32(imm))); + } + return advancePc(a, pc); + } + + function executeSRLW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint32 rs1val = uint32(StateAccess.readX(a, rs1)); + uint32 rs2val = uint32(StateAccess.readX(a, rs2)); + int32 rdval = int32(StateAccess.uint32ShiftRight(rs1val, rs2val)); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(rdval)); + } + return advancePc(a, pc); + } + + function executeSRLIW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandShamt5(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint32 rs1val = uint32(StateAccess.readX(a, rs1)); + int32 rdval = int32(StateAccess.uint32ShiftRight(rs1val, uint32(imm))); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(rdval)); + } + return advancePc(a, pc); + } + + function executeSRAI(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandShamt6(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + StateAccess.writeX( + a, rd, uint64(StateAccess.int64ShiftRight(int64(rs1val), uint32(imm))) + ); + } + return advancePc(a, pc); + } + + function executeSRAIW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandShamt5(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + int32 rs1val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs1)); + if (rd != 0) { + StateAccess.writeX( + a, rd, StateAccess.int32ToUint64(StateAccess.int32ShiftRight(rs1val, uint32(imm))) + ); + } + return advancePc(a, pc); + } + + function executeADD(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + StateAccess.writeX(a, rd, StateAccess.uint64AddUint64(rs1val, rs2val)); + } + return advancePc(a, pc); + } + + function executeADDW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + int32 rs1val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs1)); + int32 rs2val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs2)); + if (rd != 0) { + int32 val = StateAccess.int32AddInt32(rs1val, rs2val); + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(val)); + } + return advancePc(a, pc); + } + + function executeSUB(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + StateAccess.writeX(a, rd, StateAccess.uint64SubUint64(rs1val, rs2val)); + } + return advancePc(a, pc); + } + + function executeSUBW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + int32 rs1val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs1)); + int32 rs2val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs2)); + if (rd != 0) { + int32 val = StateAccess.int32SubInt32(rs1val, rs2val); + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(val)); + } + return advancePc(a, pc); + } + + function executeSLL(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint32 rs2val = uint32(StateAccess.readX(a, rs2)); + StateAccess.writeX(a, rd, StateAccess.uint64ShiftLeft(rs1val, rs2val)); + } + return advancePc(a, pc); + } + + function executeSLLW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint32 rs1val = uint32(StateAccess.readX(a, rs1)); + uint32 rs2val = uint32(StateAccess.readX(a, rs2)); + int32 rdval = int32(StateAccess.uint32ShiftLeft(rs1val, rs2val)); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(rdval)); + } + return advancePc(a, pc); + } + + function executeSLT(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + int64 rs1val = int64(StateAccess.readX(a, rs1)); + int64 rs2val = int64(StateAccess.readX(a, rs2)); + uint64 rdval = 0; + if (rs1val < rs2val) { + rdval = 1; + } + StateAccess.writeX(a, rd, rdval); + } + return advancePc(a, pc); + } + + function executeSLTU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + uint64 rdval = 0; + if (rs1val < rs2val) { + rdval = 1; + } + StateAccess.writeX(a, rd, rdval); + } + return advancePc(a, pc); + } + + function executeXOR(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + StateAccess.writeX(a, rd, rs1val ^ rs2val); + } + return advancePc(a, pc); + } + + function executeSRL(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + StateAccess.writeX(a, rd, StateAccess.uint64ShiftRight(rs1val, uint32(rs2val))); + } + return advancePc(a, pc); + } + + function executeSRA(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + int64 rs1val = int64(StateAccess.readX(a, rs1)); + uint32 rs2val = uint32(StateAccess.readX(a, rs2)); + StateAccess.writeX(a, rd, uint64(StateAccess.int64ShiftRight(rs1val, rs2val))); + } + return advancePc(a, pc); + } + + function executeSRAW(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + int32 rs1val = StateAccess.uint64ToInt32(StateAccess.readX(a, rs1)); + uint32 rs2val = uint32(StateAccess.readX(a, rs2)); + int32 rdval = StateAccess.int32ShiftRight(rs1val, rs2val); + if (rd != 0) { + StateAccess.writeX(a, rd, StateAccess.int32ToUint64(rdval)); + } + return advancePc(a, pc); + } + + function executeOR(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + StateAccess.writeX(a, rd, rs1val | rs2val); + } + return advancePc(a, pc); + } + + function executeAND(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + if (rd != 0) { + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + StateAccess.writeX(a, rd, rs1val & rs2val); + } + return advancePc(a, pc); + } + + function executeFENCE(StepLog.Context memory a, uint32, uint64 pc) private pure { + return advancePc(a, pc); + } + + function executeLWU(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint32 u32 = readUint32(a, StateAccess.uint64AddInt32(rs1val, imm)); + if (rd != 0) { + StateAccess.writeX(a, rd, u32); + } + return advancePc(a, pc); + } + + function executeLD(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandImm12(insn); + uint8 rd = operandRd(insn); + uint8 rs1 = operandRs1(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 u64 = readUint64(a, StateAccess.uint64AddInt32(rs1val, imm)); + if (rd != 0) { + StateAccess.writeX(a, rd, u64); + } + return advancePc(a, pc); + } + + function executeSD(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + int32 imm = operandSimm12(insn); + uint8 rs1 = operandRs1(insn); + uint8 rs2 = operandRs2(insn); + uint64 rs1val = StateAccess.readX(a, rs1); + uint64 rs2val = StateAccess.readX(a, rs2); + writeUint64(a, StateAccess.uint64AddInt32(rs1val, imm), rs2val); + return advancePc(a, pc); + } + + function executeECALL(StepLog.Context memory a, uint32, uint64 pc) private pure { + // ECALL conventions + // a0--a7 are the same as x10--x17 + // syscall is passed in a7 + // arguments are passed in a0--a5 + // return value is in a0 (and maybe also in a1) + uint64 fn = StateAccess.readX(a, 17); // a7 contains the function number + if (fn == EmulatorConstants.UARCH_ECALL_FN_HALT) { + return StateAccess.writeHaltFlag(a, 1); + } + if (fn == EmulatorConstants.UARCH_ECALL_FN_PUTCHAR) { + uint64 c = StateAccess.readX(a, 10); // a0 contains the character to print + StateAccess.putCharECALL(a, uint8(c)); // Can be a NOOP in Solidity + return advancePc(a, pc); + } + if (fn == EmulatorConstants.UARCH_ECALL_FN_WRITE_TLB) { + uint64 set_index = StateAccess.readX(a, 10); // a0 contains TLB set (code, read, write) + uint64 slot_index = StateAccess.readX(a, 11); // a1 contains slot_index to modify + uint64 vaddr_page = StateAccess.readX(a, 12); // a2 contains vaddr_page to write + uint64 vp_offset = StateAccess.readX(a, 13); // a3 contains vp_offset to write + uint64 pma_index = StateAccess.readX(a, 14); // a4 contains index of PMA where page falls + StateAccess.writeTlbECALL(a, set_index, slot_index, vaddr_page, vp_offset, pma_index); // WARNING: This CANNOT be a NOOP in Solidity + return advancePc(a, pc); + } + StateAccess.throwRuntimeError(a, "unsupported ecall function"); + } + + function executeEBREAK(StepLog.Context memory a, uint32, uint64) private pure { + StateAccess.throwRuntimeError(a, "uarch aborted"); + } + + /// \brief Returns true if the opcode field of an instruction matches the provided argument + function insnMatchOpcode(uint32 insn, uint32 opcode) private pure returns (bool) { + return ((insn & 0x7f)) == opcode; + } + + /// \brief Returns true if the opcode and funct3 fields of an instruction match the provided arguments + function insnMatchOpcodeFunct3(uint32 insn, uint32 opcode, uint32 funct3) + private + pure + returns (bool) + { + uint32 mask = (7 << 12) | 0x7f; + return (insn & mask) == (StateAccess.uint32ShiftLeft(funct3, 12) | opcode); + } + + /// \brief Returns true if the opcode, funct3 and funct7 fields of an instruction match the provided arguments + function insnMatchOpcodeFunct3Funct7(uint32 insn, uint32 opcode, uint32 funct3, uint32 funct7) + private + pure + returns (bool) + { + uint32 mask = (0x7f << 25) | (7 << 12) | 0x7f; + return ((insn & mask)) + == ( + StateAccess.uint32ShiftLeft(funct7, 25) | StateAccess.uint32ShiftLeft(funct3, 12) + | opcode + ); + } + + /// \brief Returns true if the opcode, funct3 and 6 most significant bits of funct7 fields of an instruction match the + /// provided arguments + function insnMatchOpcodeFunct3Funct7Sr1( + uint32 insn, + uint32 opcode, + uint32 funct3, + uint32 funct7Sr1 + ) private pure returns (bool) { + uint32 mask = (0x3f << 26) | (7 << 12) | 0x7f; + return ((insn & mask)) + == ( + StateAccess.uint32ShiftLeft(funct7Sr1, 26) | StateAccess.uint32ShiftLeft(funct3, 12) + | opcode + ); + } + + // Decode and execute one instruction + + function executeInsn(StepLog.Context memory a, uint32 insn, uint64 pc) private pure { + if (insnMatchOpcodeFunct3(insn, 0x13, 0x0)) { + return executeADDI(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x3)) { + return executeLD(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x63, 0x6)) { + return executeBLTU(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x63, 0x0)) { + return executeBEQ(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x13, 0x7)) { + return executeANDI(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x0, 0x0)) { + return executeADD(a, insn, pc); + } + if (insnMatchOpcode(insn, 0x6f)) { + return executeJAL(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7Sr1(insn, 0x13, 0x1, 0x0)) { + return executeSLLI(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x7, 0x0)) { + return executeAND(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x23, 0x3)) { + return executeSD(a, insn, pc); + } + if (insnMatchOpcode(insn, 0x37)) { + return executeLUI(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x67, 0x0)) { + return executeJALR(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x1b, 0x0)) { + return executeADDIW(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7Sr1(insn, 0x13, 0x5, 0x0)) { + return executeSRLI(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x1b, 0x5, 0x0)) { + return executeSRLIW(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x63, 0x1)) { + return executeBNE(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x2)) { + return executeLW(a, insn, pc); + } + if (insnMatchOpcode(insn, 0x17)) { + return executeAUIPC(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x63, 0x7)) { + return executeBGEU(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x3b, 0x0, 0x0)) { + return executeADDW(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7Sr1(insn, 0x13, 0x5, 0x10)) { + return executeSRAI(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x6, 0x0)) { + return executeOR(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x1b, 0x5, 0x20)) { + return executeSRAIW(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x63, 0x5)) { + return executeBGE(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x0, 0x20)) { + return executeSUB(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x4)) { + return executeLBU(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x1b, 0x1, 0x0)) { + return executeSLLIW(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x5, 0x0)) { + return executeSRL(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x4, 0x0)) { + return executeXOR(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x23, 0x2)) { + return executeSW(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x1, 0x0)) { + return executeSLL(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x63, 0x4)) { + return executeBLT(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x23, 0x0)) { + return executeSB(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x3b, 0x0, 0x20)) { + return executeSUBW(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x13, 0x4)) { + return executeXORI(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x5, 0x20)) { + return executeSRA(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x5)) { + return executeLHU(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x23, 0x1)) { + return executeSH(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x3b, 0x5, 0x0)) { + return executeSRLW(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x6)) { + return executeLWU(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x3b, 0x1, 0x0)) { + return executeSLLW(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x0)) { + return executeLB(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x3, 0x0)) { + return executeSLTU(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x3b, 0x5, 0x20)) { + return executeSRAW(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x3, 0x1)) { + return executeLH(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x13, 0x6)) { + return executeORI(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x13, 0x3)) { + return executeSLTIU(a, insn, pc); + } + if (insnMatchOpcodeFunct3Funct7(insn, 0x33, 0x2, 0x0)) { + return executeSLT(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0x13, 0x2)) { + return executeSLTI(a, insn, pc); + } + if (insnMatchOpcodeFunct3(insn, 0xf, 0x0)) { + return executeFENCE(a, insn, pc); + } + if (insn == uint32(0x73)) { + return executeECALL(a, insn, pc); + } + if (insn == uint32(0x100073)) { + return executeEBREAK(a, insn, pc); + } + StateAccess.throwRuntimeError(a, "illegal instruction"); + } + + function uarchStep(StepLog.Context memory a) internal pure returns (UArchStepStatus) { + // Read the cycle first so the overflow guard below runs before any state is mutated + uint64 cycle = StateAccess.readCycle(a); + // do not advance if cycle will overflow + if (cycle >= EmulatorConstants.UARCH_CYCLE_MAX) { + return UArchStepStatus.CycleOverflow; + } + // do not advance if machine is halted + if (StateAccess.readHaltFlag(a) != 0) { + return UArchStepStatus.UArchHalted; + } + // execute next instruction + uint64 pc = StateAccess.readPc(a); + uint32 insn = readUint32(a, pc); + executeInsn(a, insn, pc); + cycle = cycle + 1; + StateAccess.writeCycle(a, cycle); + return UArchStepStatus.Success; + } +} diff --git a/solidity-step/src/Verify.sol b/solidity-step/src/Verify.sol new file mode 100644 index 000000000..8d0365b9d --- /dev/null +++ b/solidity-step/src/Verify.sol @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {SendCmioResponse} from "src/SendCmioResponse.sol"; +import {StepLog} from "src/StepLog.sol"; +import {UArchReset} from "src/UArchReset.sol"; +import {UArchStep} from "src/UArchStep.sol"; + +/// Verifies binary step logs. Decode once, then verify: +/// +/// StepLog.Context memory ctx = StepLog.decode(log); +/// Verify.verifyStep(ctx, rootBefore, cycleCount, rootAfter); +/// +/// decode checks the pre-state root and that the log occupies the whole buffer; each +/// verifyXXX checks the caller's beliefs, runs the operation, and checks the post-state +/// root. Reverts on mismatch. MUTATES `ctx`; do not reuse a Context across calls. For +/// multi-log composition, call StepLog.decodeAt in a cursor loop instead. +library Verify { + error RootHashBeforeMismatch(bytes32 expected, bytes32 fromLog); + error RootHashAfterMismatch(bytes32 expected, bytes32 fromLog); + error UarchCycleCountMismatch(uint64 expected, uint64 fromLog); + /// Reset/cmio logs must declare requested_cycle_count = 0. + error RequestedCycleCountMustBeZero(uint64 fromLog); + /// Recomputed post-state root does not match the log header (Layer 1). + error FinalRootHashMismatch(); + + function verifyStep( + StepLog.Context memory ctx, + bytes32 rootHashBefore, + uint64 cycleCount, + bytes32 rootHashAfter + ) internal pure { + if (ctx.rootHashBefore != rootHashBefore) { + revert RootHashBeforeMismatch(rootHashBefore, ctx.rootHashBefore); + } + if (ctx.requestedCycleCount != cycleCount) { + revert UarchCycleCountMismatch(cycleCount, ctx.requestedCycleCount); + } + + for (uint64 i = 0; i < cycleCount; i++) { + UArchStep.UArchStepStatus status = UArchStep.uarchStep(ctx); + if (status != UArchStep.UArchStepStatus.Success) break; + } + + // A uarch step never ends paused on a main-machine rejected yield (and does not witness the + // shadow-registers page), so no revert substitution applies; the post-state is the tree root. + if (StepLog.computeRootHash(ctx, true) != ctx.rootHashAfter) { + revert FinalRootHashMismatch(); + } + if (ctx.rootHashAfter != rootHashAfter) { + revert RootHashAfterMismatch(rootHashAfter, ctx.rootHashAfter); + } + } + + function verifyReset(StepLog.Context memory ctx, bytes32 rootHashBefore, bytes32 rootHashAfter) + internal + pure + { + if (ctx.rootHashBefore != rootHashBefore) { + revert RootHashBeforeMismatch(rootHashBefore, ctx.rootHashBefore); + } + if (ctx.requestedCycleCount != 0) { + revert RequestedCycleCountMustBeZero(ctx.requestedCycleCount); + } + + UArchReset.uarchResetState(ctx); + + // When the reset reverted the state on a rejected input, the canonical post-state hash is the + // recorded revert root hash (carried on the context by revertState), not the recomputed tree root. + bytes32 finalRootHash; + if (ctx.reverted) { + // Revert substitutes the recorded root instead of recomputing it; still assert no node was + // left unconsumed (computeRootHash makes this assertion on the non-reverted path). + StepLog.checkAllNodesConsumed(ctx); + finalRootHash = ctx.revertedRootHash; + } else { + finalRootHash = StepLog.computeRootHash(ctx, true); + } + if (finalRootHash != ctx.rootHashAfter) { + revert FinalRootHashMismatch(); + } + if (ctx.rootHashAfter != rootHashAfter) { + revert RootHashAfterMismatch(rootHashAfter, ctx.rootHashAfter); + } + } + + function verifySendCmioResponse( + StepLog.Context memory ctx, + bytes32 rootHashBefore, + uint16 reason, + bytes calldata data, + bytes32 revertRootHash, + bytes32 rootHashAfter + ) internal pure { + if (ctx.rootHashBefore != rootHashBefore) { + revert RootHashBeforeMismatch(rootHashBefore, ctx.rootHashBefore); + } + if (ctx.requestedCycleCount != 0) { + revert RequestedCycleCountMustBeZero(ctx.requestedCycleCount); + } + + SendCmioResponse.sendCmioResponse(ctx, revertRootHash, reason, data, uint32(data.length)); + + // send_cmio_response is not a step. Even when it no-ops on a machine paused on a rejected + // input, its transition is the identity, so the post-state is the recomputed tree root with + // no revert substitution. + if (StepLog.computeRootHash(ctx, true) != ctx.rootHashAfter) { + revert FinalRootHashMismatch(); + } + if (ctx.rootHashAfter != rootHashAfter) { + revert RootHashAfterMismatch(rootHashAfter, ctx.rootHashAfter); + } + } +} diff --git a/solidity-step/test/HashTree.t.sol b/solidity-step/test/HashTree.t.sol new file mode 100644 index 000000000..2356cb6a9 --- /dev/null +++ b/solidity-step/test/HashTree.t.sol @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {Test} from "forge-std/Test.sol"; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {HashTree} from "src/HashTree.sol"; + +/// Cross-checks src/HashTree.sol's two primitives against independent oracles: +/// merkleTreeHash vs an iterative bottom-up reference; merkleTreeHashPadded vs +/// brute-force "materialise the pad, then merkleTreeHash". +contract HashTreeTest is Test { + // Literals: Solidity can't use cross-library constants in fixed-array sizes. + uint256 constant PAGE_LEAF_COUNT = 128; + uint256 constant SIBLING_SIZE = 32; + + /// Iterative bottom-up reference oracle: halves the level array in place. + /// Independent of production's recursive merkleTreeHash. + function hashIter(bytes memory pageData, uint256 startOffset) internal pure returns (bytes32) { + bytes32[PAGE_LEAF_COUNT] memory level; + for (uint256 i = 0; i < PAGE_LEAF_COUNT; i++) { + bytes32 word; + assembly { + word := mload(add(add(pageData, 32), add(startOffset, mul(i, SIBLING_SIZE)))) + } + level[i] = keccak256(abi.encodePacked(word)); + } + uint256 n = PAGE_LEAF_COUNT; + while (n > 1) { + n >>= 1; + for (uint256 i = 0; i < n; i++) { + level[i] = keccak256(abi.encodePacked(level[2 * i], level[2 * i + 1])); + } + } + return level[0]; + } + + /// The recursive production hash must agree with the iterative oracle on both a + /// pristine (all-zero) page and a non-trivial one. + function testRecursiveMatchesIterative() public pure { + bytes memory zero = new bytes(EmulatorConstants.PAGE_SIZE); + bytes memory data = filler(EmulatorConstants.PAGE_SIZE); + for (uint256 p = 0; p < 2; p++) { + bytes memory page = p == 0 ? zero : data; + bytes32 rec = HashTree.merkleTreeHash(page, 0, EmulatorConstants.PAGE_SIZE); + bytes32 iter = hashIter(page, 0); + assertEq(rec, iter, "production recursive disagrees with iterative oracle"); + } + } + + /// Brute-force oracle: materialise `data || zeros` of 2^totalLog2Size bytes and + /// hash via merkleTreeHash. + function padAndHash(bytes memory data, uint8 totalLog2Size) internal pure returns (bytes32) { + uint256 totalSize = uint256(1) << totalLog2Size; + require(totalSize >= data.length, "totalSize < data.length"); + bytes memory padded = new bytes(totalSize); // zero-initialised + uint256 srcLen = data.length; + assembly { + mcopy(add(padded, 32), add(data, 32), srcLen) + } + return HashTree.merkleTreeHash(padded, 0, totalSize); + } + + /// Hand-picked sizes that exercise each branch of merkleTreeHashPadded's + /// recursion: entirely-pristine, entirely-within, leaf-overlap, mixed + /// supra-page, full (no padding). + function testFixedBoundaries() public view { + // log2Size 13 = 8 KB (smallest supra-page case) + check("", 13); + check(filler(1), 13); + check(filler(32), 13); // exactly one leaf + check(filler(33), 13); // leaf-overlap branch + check(filler(4096), 13); // exactly one page + check(filler(4097), 13); // mixed supra-page + check(filler((1 << 13) - 1), 13); + check(filler(1 << 13), 13); // no padding + + // log2Size 17 = 128 KB (matches our largest cmio fixture write size) + check("", 17); + check(filler(1), 17); + check(filler(65536), 17); + check(filler((1 << 17) - 1), 17); + check(filler(1 << 17), 17); + } + + /// Random data + random log2Size in [LEAF_LOG2, 17]; 17 caps per-iteration + /// memory while still spanning the leaf-overlap regime. + function testFuzz(bytes memory data, uint8 log2Size) public view { + log2Size = uint8(bound(uint256(log2Size), EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE, 17)); + uint256 totalSize = uint256(1) << log2Size; + if (data.length > totalSize) { + // Trim rather than reject: avoids wasted fuzz runs. + assembly { + mstore(data, totalSize) + } + } + check(data, log2Size); + } + + /// log2Size outside [WORD, ROOT) is rejected. + function testRejectsBadPaddedLog2() public { + vm.expectPartialRevert(HashTree.PaddedMerkleHashLog2SizeOutOfRange.selector); + this.calldataWrap("", EmulatorConstants.HASH_TREE_LOG2_WORD_SIZE - 1); // too small + vm.expectPartialRevert(HashTree.PaddedMerkleHashLog2SizeOutOfRange.selector); + this.calldataWrap("", EmulatorConstants.HASH_TREE_LOG2_ROOT_SIZE); // == root, too large + } + + /// Data longer than the padded region is rejected. + function testRejectsOversizedData() public { + vm.expectPartialRevert(HashTree.DataExceedsPaddedSize.selector); + this.calldataWrap(filler((1 << 13) + 1), 13); + } + + function check(bytes memory data, uint8 log2Size) internal view { + bytes32 oracle = padAndHash(data, log2Size); + bytes32 fast = this.calldataWrap(data, log2Size); + assertEq(fast, oracle, "merkleTreeHashPadded disagrees with brute-force oracle"); + } + + /// Calldata bridge: merkleTreeHashPadded takes `bytes calldata`. + function calldataWrap(bytes calldata data, uint8 log2Size) external pure returns (bytes32) { + return HashTree.merkleTreeHashPadded(data, log2Size); + } + + /// a..z cycling, so a byte error surfaces as a hash mismatch rather than a same-byte hash. + function filler(uint256 n) internal pure returns (bytes memory b) { + b = new bytes(n); + for (uint256 i = 0; i < n; i++) { + b[i] = bytes1(uint8(0x61 + (i % 26))); + } + } +} diff --git a/solidity-step/test/ManifestParser.sol b/solidity-step/test/ManifestParser.sol new file mode 100644 index 000000000..a8888a4fa --- /dev/null +++ b/solidity-step/test/ManifestParser.sol @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {Test} from "forge-std/Test.sol"; + +/// Shared base for tests that walk a `_manifest.csv` produced by +/// tests/lua/uarch-riscv-tests.lua. Unknown kinds map to `Kind.Unknown` so +/// readers stay forward-compatible with fixture kinds added later. +abstract contract ManifestParser is Test { + enum Kind { + Unknown, + Program, + Cycle, + ResetUarch, + SendCmioResponse + } + + // The cmio `data` column is plain ASCII (recorder-controlled, CSV-safe). + struct Row { + Kind kind; + string name; + string hashFunction; + uint64 requestedCycleCount; + bytes32 rootHashBefore; + bytes32 rootHashAfter; + uint16 reason; + uint32 dataLength; + bytes data; + // Value written to the revert-root-hash shadow slot (cmio + reset rows); zero otherwise. + bytes32 revertRootHash; + // Names the rejection a corrupt fixture must trigger (reject fixtures); blank means + // the log must replay successfully. + string expectError; + } + + // Canonical uarch fixture locations, shared by the single-step uarch tests. + string constant UARCH_TESTS_MANIFEST = "test/fixtures/uarch-tests/_manifest.csv"; + string constant UARCH_PER_CYCLE_DIR = "test/fixtures/uarch-tests-per-cycle"; + + /// Returns the rows of `path` matching `kind`; others are skipped. + /// Two-pass: count, then allocate exactly and fill. + function readManifestRows(string memory path, Kind kind) internal returns (Row[] memory) { + vm.readLine(path); // header + uint256 n = 0; + while (true) { + string memory line = vm.readLine(path); + if (bytes(line).length == 0) break; + if (parseRow(line).kind == kind) n++; + } + vm.closeFile(path); + + Row[] memory out = new Row[](n); + vm.readLine(path); // header + uint256 i = 0; + while (i < n) { + string memory line = vm.readLine(path); + if (bytes(line).length == 0) break; + Row memory r = parseRow(line); + if (r.kind == kind) out[i++] = r; + } + vm.closeFile(path); + return out; + } + + /// First row of `path` matching `kind`; reverts if there are none. + function firstRow(string memory path, Kind kind) internal returns (Row memory) { + Row[] memory rows = readManifestRows(path, kind); + require(rows.length > 0, "no manifest row for kind"); + return rows[0]; + } + + /// A representative valid step log for happy-path and tampering tests, with its manifest + /// row (before/after roots, cycle count). Any recorded cycle would serve equally; this + /// returns the first program's first cycle. Callers must not depend on which one. + function sampleStepLog() internal returns (bytes memory log, Row memory row) { + Row memory program = firstRow(UARCH_TESTS_MANIFEST, Kind.Program); + string memory dir = vm.split(program.name, ".log")[0]; + row = firstRow(string.concat(UARCH_PER_CYCLE_DIR, "/", dir, "/_manifest.csv"), Kind.Cycle); + log = vm.readFileBinary(string.concat(UARCH_PER_CYCLE_DIR, "/", dir, "/", row.name)); + } + + function parseRow(string memory line) internal pure returns (Row memory r) { + string[] memory cols = vm.split(line, ","); + require(cols.length == 11, "manifest row must have 11 columns"); + r.kind = parseKind(cols[0]); + r.name = cols[1]; + r.expectError = cols[2]; + r.hashFunction = cols[3]; + r.requestedCycleCount = uint64(vm.parseUint(cols[4])); + r.rootHashBefore = vm.parseBytes32(cols[5]); + r.rootHashAfter = vm.parseBytes32(cols[6]); + if (bytes(cols[7]).length > 0) r.reason = uint16(vm.parseUint(cols[7])); + if (bytes(cols[8]).length > 0) r.dataLength = uint32(vm.parseUint(cols[8])); + r.data = bytes(cols[9]); + if (bytes(cols[10]).length > 0) r.revertRootHash = vm.parseBytes32(cols[10]); + } + + function parseKind(string memory s) private pure returns (Kind) { + bytes32 h = keccak256(bytes(s)); + if (h == keccak256(bytes("program"))) return Kind.Program; + if (h == keccak256(bytes("cycle"))) return Kind.Cycle; + if (h == keccak256(bytes("reset_uarch"))) return Kind.ResetUarch; + if (h == keccak256(bytes("send_cmio_response"))) return Kind.SendCmioResponse; + return Kind.Unknown; + } +} diff --git a/solidity-step/test/RejectFixtures.t.sol b/solidity-step/test/RejectFixtures.t.sol new file mode 100644 index 000000000..4f26724c3 --- /dev/null +++ b/solidity-step/test/RejectFixtures.t.sol @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {StateAccess} from "src/StateAccess.sol"; +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// Replays the shared reject fixtures and asserts the Solidity verifier rejects every entry +/// with the error its `expectError` tag names. The same logs run through the C++ host, so a +/// tag pins identical rejection across implementations. Generation and the tag vocabulary +/// live with the recorders: tests/lua/record-adversarial-uarch.lua (step + reset) and +/// record-adversarial-send-cmio-response.lua. One cmio rejection (a response whose length implies a +/// write size the log's node was not recorded for) is about the data argument rather than the log +/// bytes, so it can't be a fixture row and is checked inline. +contract RejectFixturesTest is ManifestParser { + string constant UARCH_DIR = "test/fixtures/reject-uarch"; + string constant UARCH_MANIFEST = "test/fixtures/reject-uarch/_manifest.csv"; + string constant REJECT_SEND_CMIO_RESPONSE_DIR = "test/fixtures/reject-send-cmio-response"; + string constant REJECT_SEND_CMIO_RESPONSE_MANIFEST = + "test/fixtures/reject-send-cmio-response/_manifest.csv"; + string constant SEND_CMIO_RESPONSE_DIR = "test/fixtures/send-cmio-response"; + string constant SEND_CMIO_RESPONSE_MANIFEST = "test/fixtures/send-cmio-response/_manifest.csv"; + + function testRejectsUarchSteps() public { + replayUarch(Kind.Cycle); + } + + function testRejectsUarchResets() public { + replayUarch(Kind.ResetUarch); + } + + function testRejectsCmio() public { + Row[] memory rows = + readManifestRows(REJECT_SEND_CMIO_RESPONSE_MANIFEST, Kind.SendCmioResponse); + require(rows.length > 0, "no cmio reject rows"); + for (uint256 i = 0; i < rows.length; i++) { + bytes memory log = + vm.readFileBinary(string.concat(REJECT_SEND_CMIO_RESPONSE_DIR, "/", rows[i].name)); + armExpected(rows[i].expectError); + this.verifyCmio( + log, + rows[i].rootHashBefore, + rows[i].reason, + rows[i].data, + rows[i].revertRootHash, + rows[i].rootHashAfter + ); + } + } + + /// A response whose length implies a different write size than the log's node was + /// recorded for must be rejected. Like the oversized case, the mismatch is in the data + /// argument (independent of the log's tree), so it is built inline against a supra-page + /// fixture rather than carried as a row. + function testRejectsCmioNodeWrongSize() public { + Row[] memory rows = readManifestRows(SEND_CMIO_RESPONSE_MANIFEST, Kind.SendCmioResponse); + Row memory big; + bool found; + for (uint256 i = 0; i < rows.length; i++) { + if (rows[i].dataLength >= 65536) { + big = rows[i]; + found = true; + break; + } + } + require(found, "need a >=64 KB cmio fixture"); + bytes memory log = vm.readFileBinary(string.concat(SEND_CMIO_RESPONSE_DIR, "/", big.name)); + // The node is sized for 64 KB; claim a smaller supra-page write (4097 B -> log2 13). + bytes memory data = new bytes(4097); + vm.expectPartialRevert(StateAccess.WriteMemoryNodeWrongSize.selector); + this.verifyCmio( + log, big.rootHashBefore, big.reason, data, big.revertRootHash, big.rootHashAfter + ); + } + + function replayUarch(Kind kind) internal { + Row[] memory rows = readManifestRows(UARCH_MANIFEST, kind); + require(rows.length > 0, "no uarch reject rows for kind"); + for (uint256 i = 0; i < rows.length; i++) { + bytes memory log = vm.readFileBinary(string.concat(UARCH_DIR, "/", rows[i].name)); + armExpected(rows[i].expectError); + if (kind == Kind.Cycle) { + this.verifyCycle( + log, rows[i].rootHashBefore, rows[i].requestedCycleCount, rows[i].rootHashAfter + ); + } else { + this.verifyReset(log, rows[i].rootHashBefore, rows[i].rootHashAfter); + } + } + } + + /// Map the reject tag to the Solidity error it must revert with. Custom errors match by + /// selector (arguments vary); the interpreter traps share one RuntimeError selector, so + /// they match the exact message that distinguishes them. + function armExpected(string memory tag) internal { + bytes32 t = keccak256(bytes(tag)); + if (t == keccak256("illegal_instruction")) { + expectRuntime("illegal instruction"); + } else if (t == keccak256("uarch_aborted")) { + expectRuntime("uarch aborted"); + } else if (t == keccak256("unsupported_ecall")) { + expectRuntime("unsupported ecall function"); + } else { + vm.expectPartialRevert(selectorFor(t)); + } + } + + function expectRuntime(string memory message) internal { + vm.expectRevert(abi.encodeWithSelector(StateAccess.RuntimeError.selector, message)); + } + + function selectorFor(bytes32 t) internal pure returns (bytes4) { + if (t == keccak256("bad_signature")) return StepLog.InvalidSignature.selector; + if (t == keccak256("unsupported_hash_function")) { + return StepLog.UnsupportedHashFunction.selector; + } + if (t == keccak256("node_misaligned")) return StepLog.NodeNotAligned.selector; + if (t == keccak256("node_log2_out_of_range")) { + return StepLog.NodeLog2SizeOutOfRange.selector; + } + if (t == keccak256("nonzero_scratch_hash")) return StepLog.NonZeroScratchHash.selector; + if (t == keccak256("page_count_zero")) return StepLog.PageCountZero.selector; + if (t == keccak256("page_index_not_ascending")) return StepLog.PagesNotInOrder.selector; + if (t == keccak256("entries_overlap")) return StepLog.OverlappingEntries.selector; + if (t == keccak256("unconsumed_node")) return StepLog.UnconsumedNodes.selector; + if (t == keccak256("initial_root_mismatch")) { + return StepLog.InitialRootHashMismatch.selector; + } + if (t == keccak256("nonzero_cycle_count")) { + return Verify.RequestedCycleCountMustBeZero.selector; + } + if (t == keccak256("root_before_mismatch")) return Verify.RootHashBeforeMismatch.selector; + if (t == keccak256("root_after_mismatch")) return Verify.RootHashAfterMismatch.selector; + if (t == keccak256("cycle_count_mismatch")) return Verify.UarchCycleCountMismatch.selector; + if (t == keccak256("final_root_mismatch")) return Verify.FinalRootHashMismatch.selector; + if (t == keccak256("reset_node_wrong_posthash")) { + return StateAccess.ResetUarchWrongPostHash.selector; + } + if (t == keccak256("cmio_node_hash_mismatch")) { + return StateAccess.WriteMemoryHashMismatch.selector; + } + revert("unmapped reject tag"); + } + + // External wrappers so `log`/`data` arrive as calldata for StepLog.decode. + + function verifyCycle(bytes calldata log, bytes32 rb, uint64 cc, bytes32 ra) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyStep(ctx, rb, cc, ra); + } + + function verifyReset(bytes calldata log, bytes32 rb, bytes32 ra) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyReset(ctx, rb, ra); + } + + function verifyCmio( + bytes calldata log, + bytes32 rb, + uint16 reason, + bytes calldata data, + bytes32 revertRootHash, + bytes32 ra + ) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifySendCmioResponse(ctx, rb, reason, data, revertRootHash, ra); + } +} diff --git a/solidity-step/test/RejectsMutatedLog.t.sol b/solidity-step/test/RejectsMutatedLog.t.sol new file mode 100644 index 000000000..8d4331d64 --- /dev/null +++ b/solidity-step/test/RejectsMutatedLog.t.sol @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// The wire format is tightly packed (no padding, trailing bytes and nonzero scratch rejected), so +/// every byte is bound into the root, the post-state check, or a count: flipping any one must revert. +contract RejectsMutatedLogTest is ManifestParser { + bytes fixture; + bytes32 rootBefore; + uint64 cycleCount; + bytes32 rootAfter; + + function setUp() public { + Row memory row; + (fixture, row) = sampleStepLog(); + rootBefore = row.rootHashBefore; + cycleCount = row.requestedCycleCount; + rootAfter = row.rootHashAfter; + } + + /// Flip one byte (xor a non-zero mask) at an arbitrary offset and require rejection. + /// forge-config: default.fuzz.runs = 2048 + function testFuzzSingleByteFlipRejected(uint256 index, uint8 mask) public { + vm.assume(mask != 0); + index = bound(index, 0, fixture.length - 1); + bytes memory bad = bytes(fixture); + bad[index] = bytes1(uint8(bad[index]) ^ mask); + vm.expectRevert(); + this.verifyStep(bad, rootBefore, cycleCount, rootAfter); + } + + /// External wrapper so `log` arrives as calldata for StepLog.decode. + function verifyStep(bytes calldata log, bytes32 rb, uint64 cc, bytes32 ra) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyStep(ctx, rb, cc, ra); + } +} + +/// Same soundness invariant for the uarch-reset proof. The reset accesses the revert root hash and +/// htif.tohost, recording their shadow page into the log; this fuzz proves every byte of that +/// page -- and of the node and header -- is bound, so no single-byte flip can yield an accepted +/// reset. Fuzzed over the real reset fixture. +contract RejectsMutatedResetLogTest is ManifestParser { + string constant RESET_DIR = "test/fixtures/reset-uarch"; + string constant MANIFEST_CSV = "test/fixtures/reset-uarch/_manifest.csv"; + + bytes fixture; + bytes32 rootBefore; + bytes32 rootAfter; + + function setUp() public { + Row memory row = firstRow(MANIFEST_CSV, Kind.ResetUarch); + fixture = vm.readFileBinary(string.concat(RESET_DIR, "/", row.name)); + rootBefore = row.rootHashBefore; + rootAfter = row.rootHashAfter; + } + + /// Flip one byte (xor a non-zero mask) at an arbitrary offset and require rejection. + /// forge-config: default.fuzz.runs = 2048 + function testFuzzSingleByteFlipRejected(uint256 index, uint8 mask) public { + vm.assume(mask != 0); + index = bound(index, 0, fixture.length - 1); + bytes memory bad = bytes(fixture); + bad[index] = bytes1(uint8(bad[index]) ^ mask); + vm.expectRevert(); + this.verifyReset(bad, rootBefore, rootAfter); + } + + /// External wrapper so `log` arrives as calldata for StepLog.decode. + function verifyReset(bytes calldata log, bytes32 rb, bytes32 ra) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyReset(ctx, rb, ra); + } +} diff --git a/solidity-step/test/RejectsUnconsumedNode.t.sol b/solidity-step/test/RejectsUnconsumedNode.t.sol new file mode 100644 index 000000000..8c7305eb5 --- /dev/null +++ b/solidity-step/test/RejectsUnconsumedNode.t.sol @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {Test} from "forge-std/Test.sol"; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +/// A zero-page log with a single root-sized node could fold an arbitrary post-state root directly. +/// Two layered defenses reject it: the page-count guard in decode and the unconsumed-node check. +contract RejectsUnconsumedNodeTest is Test { + /// Build a 192-byte step log: 112-byte header + one root-sized node, no pages, + /// no siblings. The single node covers the entire address space (addr 0, + /// log2Size 64), so it alone determines both the pre- and post-state roots. + function buildSingleRootNodeLog(bytes32 rootBefore, bytes32 rootAfter) + internal + pure + returns (bytes memory) + { + return abi.encodePacked( + EmulatorConstants.STEP_LOG_SIGNATURE, // signature (8) + rootBefore, // root_hash_before (32) + le64(0), // requested_cycle_count (8) + rootAfter, // root_hash_after (32) + le64(uint64(EmulatorConstants.HASH_FUNCTION_KECCAK256)), // hash_function (8) + le64(0), // page_count (8) + le64(1), // node_count (8) + le64(0), // sibling_count (8) + // node entry (80): + le64(0), // addr + le64(uint64(EmulatorConstants.HASH_TREE_LOG2_ROOT_SIZE)), // log2_size = 64 + rootBefore, // hash_before -> makes the pre-state root match + rootAfter // hash_after -> attacker-chosen post-state root + ); + } + + /// A zero-cycle "step" must be a no-op: post root == pre root. A single root-sized node + /// could carry an arbitrary, unrelated post root, but such a log witnesses no pages, so + /// decode rejects it at the page-count guard before the unconsumed-node check is reached. + function testForgedZeroCycleTransitionIsRejected() public { + bytes32 rootBefore = keccak256("arbitrary pre-state"); + bytes32 rootAfter = keccak256("forged unrelated post-state"); + assertTrue(rootBefore != rootAfter, "roots must differ to prove the forgery"); + + bytes memory log = buildSingleRootNodeLog(rootBefore, rootAfter); + vm.expectRevert(StepLog.PageCountZero.selector); + this.verifyStep(log, rootBefore, 0, rootAfter); + } + + function verifyStep(bytes calldata log, bytes32 rb, uint64 cc, bytes32 ra) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyStep(ctx, rb, cc, ra); + } + + function le64(uint64 v) internal pure returns (bytes memory out) { + out = new bytes(8); + for (uint256 i = 0; i < 8; i++) { + out[i] = bytes1(uint8(v >> (8 * i))); + } + } +} diff --git a/solidity-step/test/StepLogDecode.t.sol b/solidity-step/test/StepLogDecode.t.sol new file mode 100644 index 000000000..7fdd754e5 --- /dev/null +++ b/solidity-step/test/StepLogDecode.t.sol @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {StepLog} from "src/StepLog.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// Decode + structural validation of a real step-log fixture, plus corruption-rejection cases. +contract StepLogDecodeTest is ManifestParser { + // shadow uarch state at 0x400000 = page 1024; the first cycle of any uarch + // program touches it, so it is the log's first page. + uint64 constant EXPECTED_FIRST_PAGE_INDEX = 1024; + + bytes fixture; + uint64 expectedCycleCount; + bytes32 expectedRootBefore; + bytes32 expectedRootAfter; + + function setUp() public { + Row memory row; + (fixture, row) = sampleStepLog(); + expectedCycleCount = row.requestedCycleCount; + expectedRootBefore = row.rootHashBefore; + expectedRootAfter = row.rootHashAfter; + } + + /// The sibling cap must cover the page cap's worst case: a maximally address-spread log can + /// need one sibling per tree level per page, so MAX_SIBLING_COUNT >= depth * MAX_PAGE_COUNT. + function testSiblingCapCoversPageCap() public pure { + uint256 depth = + EmulatorConstants.HASH_TREE_LOG2_ROOT_SIZE - EmulatorConstants.HASH_TREE_LOG2_PAGE_SIZE; + assertGe(uint256(StepLog.MAX_SIBLING_COUNT), depth * uint256(StepLog.MAX_PAGE_COUNT)); + } + + /// decode parses the header and exposes a self-consistent context. + function testDecodeHappy() public view { + StepLog.Context memory ctx = this.decode(fixture); + + assertEq( + ctx.requestedCycleCount, expectedCycleCount, "requestedCycleCount matches manifest" + ); + assertEq(ctx.hashFunction, 0, "hashFunction keccak256"); + assertEq(ctx.rootHashBefore, expectedRootBefore, "rootHashBefore matches manifest"); + assertEq(ctx.rootHashAfter, expectedRootAfter, "rootHashAfter matches manifest"); + + uint256 pageCount = ctx.pageIndices.length; + assertGt(pageCount, 0, "log has pages"); + assertEq(ctx.pageData.length, pageCount * 4096, "pageData length tracks pageCount"); + assertEq(ctx.pageHashes.length, pageCount, "pageHashes allocated per page"); + assertEq(ctx.pageIndices[0], EXPECTED_FIRST_PAGE_INDEX, "uarch shadow state page first"); + + assertTrue(ctx.rootHashBefore != ctx.rootHashAfter, "step changed the root"); + } + + /// decodeAt returns the offset just past the decoded log. + function testDecodeAtAdvancesCursor() public view { + (, uint256 newOff) = this.decodeAt(fixture, 0); + assertEq(newOff, fixture.length, "cursor advanced past log"); + } + + function testRejectsShortLog() public { + bytes memory truncated = new bytes(50); + vm.expectRevert(StepLog.HeaderTruncated.selector); + this.decodeAt(truncated, 0); + } + + /// A hostile offset must revert cleanly (HeaderTruncated), not panic on + /// offset + HEADER_SIZE overflow. + function testRejectsOverflowingOffset() public { + vm.expectRevert(StepLog.HeaderTruncated.selector); + this.decodeAt(fixture, type(uint256).max); + } + + function testRejectsBadSignature() public { + bytes memory bad = bytes(fixture); + bad[0] = 0x00; + vm.expectRevert(StepLog.InvalidSignature.selector); + this.decodeAt(bad, 0); + } + + function testRejectsTruncatedAfterHeader() public { + // Header says it has pages but we only provide the header. + bytes memory bad = new bytes(StepLog.HEADER_SIZE); + for (uint256 i = 0; i < StepLog.HEADER_SIZE; i++) { + bad[i] = fixture[i]; + } + vm.expectRevert(StepLog.LogTruncated.selector); + this.decodeAt(bad, 0); + } + + /// Decoding verifies pre-state Merkle integrity, so a tampered page byte must reject. + function testRejectsTamperedPage() public { + bytes memory bad = bytes(fixture); + // First page data starts after the header + the 8-byte page index. + uint256 firstPageDataOff = StepLog.HEADER_SIZE + 8; + bad[firstPageDataOff] = bytes1(uint8(bad[firstPageDataOff]) ^ 0xff); + vm.expectRevert(StepLog.InitialRootHashMismatch.selector); + this.decodeAt(bad, 0); + } + + /// A valid log with junk appended is tolerated by decodeAt (cursor primitive) but + /// rejected by decode (single-log entrypoint). + function testDecodeRejectsTrailingBytes() public { + bytes memory padded = bytes.concat(fixture, hex"deadbeef"); + // decodeAt stops at the end of the log body, leaving the cursor before the buffer end. + (, uint256 newOff) = this.decodeAt(padded, 0); + assertEq(newOff, fixture.length, "decodeAt stops at end of log body"); + vm.expectRevert( + abi.encodeWithSelector(StepLog.TrailingBytes.selector, fixture.length, padded.length) + ); + this.decode(padded); + } + + // The three counts occupy the last 24 bytes of the header (page, node, sibling). + function testRejectsPageCountOverCap() public { + bytes memory bad = bytes(fixture); + uint64 over = StepLog.MAX_PAGE_COUNT + 1; + setLE64(bad, StepLog.HEADER_SIZE - 24, over); + vm.expectRevert(abi.encodeWithSelector(StepLog.PageCountExceedsLimit.selector, over)); + this.decodeAt(bad, 0); + } + + function testRejectsNodeCountOverCap() public { + bytes memory bad = bytes(fixture); + uint64 over = StepLog.MAX_NODE_COUNT + 1; + setLE64(bad, StepLog.HEADER_SIZE - 16, over); + vm.expectRevert(abi.encodeWithSelector(StepLog.NodeCountExceedsLimit.selector, over)); + this.decodeAt(bad, 0); + } + + function testRejectsSiblingCountOverCap() public { + bytes memory bad = bytes(fixture); + uint64 over = StepLog.MAX_SIBLING_COUNT + 1; + setLE64(bad, StepLog.HEADER_SIZE - 8, over); + vm.expectRevert(abi.encodeWithSelector(StepLog.SiblingCountExceedsLimit.selector, over)); + this.decodeAt(bad, 0); + } + + function setLE64(bytes memory b, uint256 off, uint64 v) internal pure { + for (uint256 i = 0; i < 8; i++) { + b[off + i] = bytes1(uint8(v >> (8 * i))); + } + } + + function decode(bytes calldata data) external pure returns (StepLog.Context memory ctx) { + return StepLog.decode(data); + } + + function decodeAt(bytes calldata data, uint256 offset) + external + pure + returns (StepLog.Context memory ctx, uint256 newOff) + { + return StepLog.decodeAt(data, offset); + } +} diff --git a/solidity-step/test/UarchFixedPoint.t.sol b/solidity-step/test/UarchFixedPoint.t.sol new file mode 100644 index 000000000..8d817b589 --- /dev/null +++ b/solidity-step/test/UarchFixedPoint.t.sol @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {EmulatorConstants} from "src/EmulatorConstants.sol"; +import {StateAccess} from "src/StateAccess.sol"; +import {StepLog} from "src/StepLog.sol"; +import {UArchStep} from "src/UArchStep.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// A uarch step at a fixed point executes nothing and reports a status instead of advancing: +/// CycleOverflow at the cycle ceiling, UArchHalted when halted. (The trapping instructions -- +/// illegal / ebreak / unsupported ecall -- are exercised by the reject fixtures.) +contract UarchFixedPointTest is ManifestParser { + bytes stepLog; + + function setUp() public { + (stepLog,) = sampleStepLog(); + } + + function testCycleOverflowIsFixedPoint() public view { + assertEq( + this.stepStatus(stepLog, true, false), uint8(UArchStep.UArchStepStatus.CycleOverflow) + ); + } + + function testHaltedIsFixedPoint() public view { + assertEq( + this.stepStatus(stepLog, false, true), uint8(UArchStep.UArchStepStatus.UArchHalted) + ); + } + + function stepStatus(bytes calldata log, bool overflow, bool halted) + external + pure + returns (uint8) + { + StepLog.Context memory ctx = StepLog.decode(log); + if (overflow) StateAccess.writeCycle(ctx, EmulatorConstants.UARCH_CYCLE_MAX); + if (halted) StateAccess.writeHaltFlag(ctx, 1); + return uint8(UArchStep.uarchStep(ctx)); + } +} diff --git a/solidity-step/test/UarchStateMath.t.sol b/solidity-step/test/UarchStateMath.t.sol new file mode 100644 index 000000000..429f6ee80 --- /dev/null +++ b/solidity-step/test/UarchStateMath.t.sol @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {Test} from "forge-std/Test.sol"; + +import {StateAccess} from "src/StateAccess.sol"; + +/// Unit tests for the math helpers in StateAccess (the transpiler bridge +/// mirroring machine-emulator/src/uarch-solidity-compat.hpp). +contract UarchStateMathTest is Test { + int16 constant INT16_MAX = type(int16).max; + int32 constant INT32_MAX = type(int32).max; + int64 constant INT64_MAX = type(int64).max; + int16 constant INT16_MIN = type(int16).min; + int32 constant INT32_MIN = type(int32).min; + int64 constant INT64_MIN = type(int64).min; + uint16 constant UINT16_MAX = type(uint16).max; + uint32 constant UINT32_MAX = type(uint32).max; + uint64 constant UINT64_MAX = type(uint64).max; + + function testSanity() public pure { + assertEq(UINT16_MAX, 65535); + assertEq(UINT32_MAX, 4294967295); + assertEq(UINT64_MAX, 18446744073709551615); + assertEq(INT16_MAX, 32767); + assertEq(INT32_MAX, 2147483647); + assertEq(INT64_MAX, 9223372036854775807); + assertEq(INT16_MIN, -32768); + assertEq(INT32_MIN, -INT32_MAX - 1); + assertEq(INT64_MIN, -INT64_MAX - 1); + } + + function testUint64ToInt32() public pure { + assertEq(StateAccess.uint64ToInt32(1), 1); + assertEq(StateAccess.uint64ToInt32(0xffffffff), -1); + assertEq(StateAccess.uint64ToInt32(0xffffffff << 31), INT32_MIN); + assertEq(StateAccess.uint64ToInt32(0xffffffff << 32), 0); + } + + function testUint64AddInt32() public pure { + assertEq(StateAccess.uint64AddInt32(2, -1), 1); + assertEq(StateAccess.uint64AddInt32(0, -1), UINT64_MAX); + assertEq(StateAccess.uint64AddInt32(UINT64_MAX, 1), 0); + } + + function testUint64SubUint64() public pure { + assertEq(StateAccess.uint64SubUint64(1, 1), 0); + assertEq(StateAccess.uint64SubUint64(0, 1), UINT64_MAX); + } + + function testUint64AddUint64() public pure { + assertEq(StateAccess.uint64AddUint64(0, 1), 1); + assertEq(StateAccess.uint64AddUint64(UINT64_MAX, 1), 0); + } + + function testUint64ShiftRight() public pure { + assertEq(StateAccess.uint64ShiftRight(0, 0), 0); + assertEq(StateAccess.uint64ShiftRight(0, 1), 0); + assertEq(StateAccess.uint64ShiftRight(4, 1), 2); + assertEq(StateAccess.uint64ShiftRight(4, 2), 1); + assertEq(StateAccess.uint64ShiftRight(4, 3), 0); + assertEq(StateAccess.uint64ShiftRight(UINT64_MAX, 63), 1); + } + + function testUint64ShiftLeft() public pure { + assertEq(StateAccess.uint64ShiftLeft(0, 0), 0); + assertEq(StateAccess.uint64ShiftLeft(0, 1), 0); + assertEq(StateAccess.uint64ShiftLeft(4, 1), 8); + assertEq(StateAccess.uint64ShiftLeft(4, 2), 16); + assertEq(StateAccess.uint64ShiftLeft(UINT64_MAX, 63), 1 << 63); + } + + function testInt64ShiftRight() public pure { + assertEq(StateAccess.int64ShiftRight(0, 0), 0); + assertEq(StateAccess.int64ShiftRight(0, 1), 0); + assertEq(StateAccess.int64ShiftRight(4, 1), 2); + assertEq(StateAccess.int64ShiftRight(4, 2), 1); + assertEq(StateAccess.int64ShiftRight(4, 3), 0); + assertEq(StateAccess.int64ShiftRight(INT64_MAX, 62), 1); + assertEq(StateAccess.int64ShiftRight(INT64_MAX, 63), 0); + assertEq(StateAccess.int64ShiftRight(-1, 1), -1); + assertEq(StateAccess.int64ShiftRight(-4, 1), -2); + assertEq(StateAccess.int64ShiftRight(INT64_MIN, 62), -2); + assertEq(StateAccess.int64ShiftRight(INT64_MIN, 63), -1); + } + + function testInt64AddInt64() public pure { + assertEq(StateAccess.int64AddInt64(0, 0), 0); + assertEq(StateAccess.int64AddInt64(0, 1), 1); + assertEq(StateAccess.int64AddInt64(0, -1), -1); + assertEq(StateAccess.int64AddInt64(-1, 0), -1); + assertEq(StateAccess.int64AddInt64(INT64_MAX, 1), INT64_MIN); + assertEq(StateAccess.int64AddInt64(INT64_MAX, INT64_MAX), -2); + } + + function testUint32ShiftRight() public pure { + assertEq(StateAccess.uint32ShiftRight(0, 0), 0); + assertEq(StateAccess.uint32ShiftRight(0, 1), 0); + assertEq(StateAccess.uint32ShiftRight(4, 1), 2); + assertEq(StateAccess.uint32ShiftRight(4, 2), 1); + assertEq(StateAccess.uint32ShiftRight(4, 3), 0); + assertEq(StateAccess.uint32ShiftRight(UINT32_MAX, 31), 1); + } + + function testUint32ShiftLeft() public pure { + assertEq(StateAccess.uint32ShiftLeft(0, 0), 0); + assertEq(StateAccess.uint32ShiftLeft(0, 1), 0); + assertEq(StateAccess.uint32ShiftLeft(4, 1), 8); + assertEq(StateAccess.uint32ShiftLeft(4, 2), 16); + assertEq(StateAccess.uint32ShiftLeft(4, 3), 32); + assertEq(StateAccess.uint32ShiftLeft(UINT32_MAX, 31), 0x80000000); + } + + function testInt32ToUint64() public pure { + assertEq(StateAccess.int32ToUint64(1), 1); + assertEq(StateAccess.int32ToUint64(INT32_MAX), 2147483647); + assertEq(StateAccess.int32ToUint64(INT32_MIN), 0xffffffff80000000); + } + + function testInt32ShiftRight() public pure { + assertEq(StateAccess.int32ShiftRight(0, 0), 0); + assertEq(StateAccess.int32ShiftRight(0, 1), 0); + assertEq(StateAccess.int32ShiftRight(4, 1), 2); + assertEq(StateAccess.int32ShiftRight(4, 2), 1); + assertEq(StateAccess.int32ShiftRight(4, 3), 0); + assertEq(StateAccess.int32ShiftRight(INT32_MAX, 30), 1); + assertEq(StateAccess.int32ShiftRight(INT32_MAX, 31), 0); + assertEq(StateAccess.int32ShiftRight(-1, 1), -1); + assertEq(StateAccess.int32ShiftRight(-4, 1), -2); + assertEq(StateAccess.int32ShiftRight(INT32_MIN, 30), -2); + assertEq(StateAccess.int32ShiftRight(INT32_MIN, 31), -1); + } + + function testInt32AddInt32() public pure { + assertEq(StateAccess.int32AddInt32(0, 0), 0); + assertEq(StateAccess.int32AddInt32(0, 1), 1); + assertEq(StateAccess.int32AddInt32(0, -1), -1); + assertEq(StateAccess.int32AddInt32(-1, 0), -1); + assertEq(StateAccess.int32AddInt32(INT32_MAX, 1), INT32_MIN); + assertEq(StateAccess.int32AddInt32(INT32_MAX, INT32_MAX), -2); + } + + function testInt32SubInt32() public pure { + assertEq(StateAccess.int32SubInt32(1, 1), 0); + assertEq(StateAccess.int32SubInt32(1, 0), 1); + assertEq(StateAccess.int32SubInt32(0, 1), -1); + assertEq(StateAccess.int32SubInt32(-1, -1), 0); + assertEq(StateAccess.int32SubInt32(INT32_MIN, INT32_MAX), 1); + assertEq(StateAccess.int32SubInt32(INT32_MAX, INT32_MIN), -1); + } + + function testInt16ToUint64() public pure { + assertEq(StateAccess.int16ToUint64(1), 1); + assertEq(StateAccess.int16ToUint64(INT16_MAX), 32767); + assertEq(StateAccess.int16ToUint64(INT16_MIN), 0xffffffffffff8000); + } + + function testInt8ToUint64() public pure { + assertEq(StateAccess.int8ToUint64(int8(1)), 1); + assertEq(StateAccess.int8ToUint64(int8(127)), 127); + assertEq(StateAccess.int8ToUint64(int8(-128)), 0xffffffffffffff80); + } + + function testUint32Log2() public pure { + assertEq(StateAccess.uint32Log2(1), 0); + assertEq(StateAccess.uint32Log2(2), 1); + assertEq(StateAccess.uint32Log2(3), 1); + assertEq(StateAccess.uint32Log2(4), 2); + assertEq(StateAccess.uint32Log2(5), 2); + assertEq(StateAccess.uint32Log2(0x7fffffff), 30); + assertEq(StateAccess.uint32Log2(0x80000000), 31); + assertEq(StateAccess.uint32Log2(0xffffffff), 31); + } + + function testUint32Log2OfZeroReverts() public { + vm.expectRevert(StateAccess.Uint32Log2OfZero.selector); + this.externalUint32Log2(0); + } + + /// External wrapper so vm.expectRevert sees a CALL boundary (internal library + /// calls inline, so the revert would otherwise hit the test method itself). + function externalUint32Log2(uint32 v) external pure returns (uint32) { + return StateAccess.uint32Log2(v); + } +} diff --git a/solidity-step/test/VerifySendCmioResponse.t.sol b/solidity-step/test/VerifySendCmioResponse.t.sol new file mode 100644 index 000000000..9d633ed59 --- /dev/null +++ b/solidity-step/test/VerifySendCmioResponse.t.sol @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {console} from "forge-std/Test.sol"; + +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// Replays each send_cmio_response fixture row via Verify.verifySendCmioResponse, covering +/// sub-leaf through supra-page payload writes. The manifest carries the raw payload bytes +/// (column `data`); the verifier hashes it on-chain. +contract VerifySendCmioResponseTest is ManifestParser { + string constant CMIO_DIR = "test/fixtures/send-cmio-response"; + string constant MANIFEST_CSV = "test/fixtures/send-cmio-response/_manifest.csv"; + + struct ReplayArgs { + bytes32 rootHashBefore; + bytes32 rootHashAfter; + bytes32 revertRootHash; + uint16 reason; + } + + function testReplaysCmio() public { + Row[] memory rows = readManifestRows(MANIFEST_CSV, Kind.SendCmioResponse); + require(rows.length > 0, "expected at least one cmio row"); + for (uint256 i = 0; i < rows.length; i++) { + console.log("Replaying cmio:", rows[i].name); + bytes memory log = vm.readFileBinary(string.concat(CMIO_DIR, "/", rows[i].name)); + ReplayArgs memory args = ReplayArgs({ + rootHashBefore: rows[i].rootHashBefore, + rootHashAfter: rows[i].rootHashAfter, + revertRootHash: rows[i].revertRootHash, + reason: rows[i].reason + }); + this.replayCmio(log, args, rows[i].data); + } + } + + /// The no-op fixture (an advance-state response to a machine that rejected its previous input) + /// leaves the state unchanged, so its log verifies with equal root hashes before and after. + function testNoopIsIdentity() public { + Row[] memory rows = readManifestRows(MANIFEST_CSV, Kind.SendCmioResponse); + bool found; + for (uint256 i = 0; i < rows.length; i++) { + if (keccak256(bytes(rows[i].name)) == keccak256("send-cmio-response-noop.log")) { + assertEq( + rows[i].rootHashBefore, rows[i].rootHashAfter, "no-op must not change root hash" + ); + found = true; + } + } + assertTrue(found, "manifest has no send-cmio-response-noop.log row"); + } + + /// External self-call so `log` and `data` arrive as `bytes calldata` + /// (Verify.verifySendCmioResponse requires calldata for on-chain hashing). + /// Args bundled into a struct to stay under via-ir's stack budget. + function replayCmio(bytes calldata log, ReplayArgs calldata args, bytes calldata data) + external + pure + { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifySendCmioResponse( + ctx, args.rootHashBefore, args.reason, data, args.revertRootHash, args.rootHashAfter + ); + } +} diff --git a/solidity-step/test/VerifyUarchReset.t.sol b/solidity-step/test/VerifyUarchReset.t.sol new file mode 100644 index 000000000..2ae7117c9 --- /dev/null +++ b/solidity-step/test/VerifyUarchReset.t.sol @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {console} from "forge-std/Test.sol"; + +import {StateAccess} from "src/StateAccess.sol"; +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// Replays the singleton reset_uarch fixture via Verify.verifyReset. +contract VerifyUarchResetTest is ManifestParser { + string constant RESET_DIR = "test/fixtures/reset-uarch"; + string constant MANIFEST_CSV = "test/fixtures/reset-uarch/_manifest.csv"; + + function testReplaysReset() public { + Row[] memory rows = readManifestRows(MANIFEST_CSV, Kind.ResetUarch); + require(rows.length == 3, "expected the plain, rejected, and accepted reset rows"); + for (uint256 i = 0; i < rows.length; i++) { + console.log("Replaying reset:", rows[i].name); + bytes memory log = vm.readFileBinary(string.concat(RESET_DIR, "/", rows[i].name)); + this.replayReset(log, rows[i]); + } + } + + /// External self-call so `log` arrives as `bytes calldata` for StepLog.decode. + function replayReset(bytes calldata log, Row memory r) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + // The reset reads iflags.Y, forcing page 0 (which holds the revert root hash slot) into this + // proof, so the revert hash round-trips off the witnessed page in every case. + require( + StateAccess.readRevertRootHash(ctx) == r.revertRootHash, "revert hash not in reset log" + ); + // The rejected reset substitutes the revert hash (== rootHashAfter); the plain and accepted + // resets do not revert and check the recomputed tree root. All must agree with the manifest. + Verify.verifyReset(ctx, r.rootHashBefore, r.rootHashAfter); + } +} diff --git a/solidity-step/test/VerifyUarchTests.t.sol b/solidity-step/test/VerifyUarchTests.t.sol new file mode 100644 index 000000000..d3fe7f520 --- /dev/null +++ b/solidity-step/test/VerifyUarchTests.t.sol @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {console} from "forge-std/Test.sol"; + +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// Replays every rv64ui-uarch test program in `uarch-tests/_manifest.csv` via the +/// Verify.* API. Each program log captures the whole test execution (hundreds of +/// cycles per program). The recorder emits a fixture only for a passing test, so a +/// replay reproducing the recorded post-state root is itself proof the test passed. +contract VerifyUarchTestsTest is ManifestParser { + string constant UARCH_TESTS_DIR = "test/fixtures/uarch-tests"; + + function testReplaysAllUarchTests() public { + Row[] memory programs = readManifestRows(UARCH_TESTS_MANIFEST, Kind.Program); + vm.pauseGasMetering(); + for (uint256 i = 0; i < programs.length; i++) { + console.log("Replaying uarch test:", programs[i].name); + bytes memory log = + vm.readFileBinary(string.concat(UARCH_TESTS_DIR, "/", programs[i].name)); + this.replayProgram(log, programs[i]); + } + } + + /// External self-call so `log` arrives as `bytes calldata` for StepLog.decode. + function replayProgram(bytes calldata log, Row memory r) external pure { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyStep(ctx, r.rootHashBefore, r.requestedCycleCount, r.rootHashAfter); + } +} diff --git a/solidity-step/test/VerifyUarchTestsPerCycle.t.sol b/solidity-step/test/VerifyUarchTestsPerCycle.t.sol new file mode 100644 index 000000000..c6e0c7149 --- /dev/null +++ b/solidity-step/test/VerifyUarchTestsPerCycle.t.sol @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +pragma solidity ^0.8.30; + +import {console} from "forge-std/Test.sol"; + +import {StepLog} from "src/StepLog.sol"; +import {Verify} from "src/Verify.sol"; + +import {ManifestParser} from "./ManifestParser.sol"; + +/// Walks every rv64ui-uarch program (discovered via `uarch-tests/_manifest.csv`) +/// and replays its per-cycle decomposition one cycle at a time via Verify.verifyStep +/// -- exactly the production dispute shape (one step log per uarchStep call). The +/// (before, after) values come from each cycle row in the per-program manifest, +/// giving real (non-tautological) Layer 2 coverage at the per-cycle granularity. +contract VerifyUarchTestsPerCycleTest is ManifestParser { + function testReplaysAllUarchTestsPerCycle() public { + Row[] memory programs = readManifestRows(UARCH_TESTS_MANIFEST, Kind.Program); + vm.pauseGasMetering(); + for (uint256 i = 0; i < programs.length; i++) { + // Program name in the manifest is the batched log filename (`foo.log`); + // the per-cycle subdirectory drops the .log suffix. + string memory programDir = vm.split(programs[i].name, ".log")[0]; + console.log("Replaying per-cycle:", programDir); + Row[] memory cycleRows = readManifestRows( + string.concat(UARCH_PER_CYCLE_DIR, "/", programDir, "/_manifest.csv"), Kind.Cycle + ); + for (uint256 j = 0; j < cycleRows.length; j++) { + bytes memory log = vm.readFileBinary( + string.concat(UARCH_PER_CYCLE_DIR, "/", programDir, "/", cycleRows[j].name) + ); + this.verifyOneCycle( + log, + cycleRows[j].rootHashBefore, + cycleRows[j].requestedCycleCount, + cycleRows[j].rootHashAfter + ); + } + } + } + + /// External self-call so `log` arrives as `bytes calldata` for StepLog.decode. + function verifyOneCycle(bytes calldata log, bytes32 before, uint64 cycleCount, bytes32 afterH) + external + pure + { + StepLog.Context memory ctx = StepLog.decode(log); + Verify.verifyStep(ctx, before, cycleCount, afterH); + } +} diff --git a/solidity-step/tools/gen-emulator-constants.lua b/solidity-step/tools/gen-emulator-constants.lua new file mode 100644 index 000000000..dca8d0635 --- /dev/null +++ b/solidity-step/tools/gen-emulator-constants.lua @@ -0,0 +1,126 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: Apache-2.0 +-- +-- Generates solidity-step/src/EmulatorConstants.sol +-- + +local cartesi = require("cartesi") +local util = require("cartesi.util") +local machine = cartesi.machine + +local function hex(n) return string.format("0x%x", n) end + +local function hex_bytes(s) return "0x" .. util.hexstring(s) end + +local function reg_addr(name) return machine:get_reg_address(name) end + +local UARCH_X_REG_COUNT = 32 +local SHADOW_TLB_SLOT_LOG2_SIZE = 5 +local TLB_SET_SIZE = 256 + +local SHADOW_TLB_SLOT_SIZE = 1 << SHADOW_TLB_SLOT_LOG2_SIZE +local SHADOW_TLB_SET_LENGTH = TLB_SET_SIZE * SHADOW_TLB_SLOT_SIZE + +assert( + cartesi.AR_SHADOW_TLB_LENGTH == 3 * SHADOW_TLB_SET_LENGTH, + "TLB layout drift: AR_SHADOW_TLB_LENGTH != 3 * SHADOW_TLB_SET_LENGTH" +) + +local lines = { +"// SPDX-License-Identifier: Apache-2.0", +"pragma solidity ^0.8.30;", +"", +"/// @notice Constants pulled from machine-emulator C++ headers.", +"/// @dev GENERATED by solidity-step/tools/gen-emulator-constants.lua - do not edit by hand.", +"/// Re-run via `make gen-constants`; CI fails on drift.", +"library EmulatorConstants {", +" // Address-range geometry (cartesi.AR_*).", +" uint64 internal constant AR_SHADOW_UARCH_STATE_START = " .. hex(cartesi.AR_SHADOW_UARCH_STATE_START) .. ";", +" uint64 internal constant AR_UARCH_RAM_START = " .. hex(cartesi.AR_UARCH_RAM_START) .. ";", +"", +" // Uarch register init values.", +" uint64 internal constant UARCH_PC_INIT = AR_UARCH_RAM_START;", +"", +" // Uarch shadow-state register addresses (cartesi.machine:get_reg_address).", +" uint64 internal constant UARCH_HALT_FLAG_ADDR = " .. hex(reg_addr("uarch_halt_flag")) .. ";", +" uint64 internal constant UARCH_CYCLE_ADDR = " .. hex(reg_addr("uarch_cycle")) .. ";", +" uint64 internal constant UARCH_PC_ADDR = " .. hex(reg_addr("uarch_pc")) .. ";", +" uint64 internal constant UARCH_X_BASE_ADDR = " .. hex(reg_addr("uarch_x0")) .. ";", +"", +" uint8 internal constant UARCH_X_REG_COUNT = " .. UARCH_X_REG_COUNT .. ";", +"", +" // Uarch cycle limit (cartesi.UARCH_CYCLE_MAX).", +" uint64 internal constant UARCH_CYCLE_MAX = " .. cartesi.UARCH_CYCLE_MAX .. ";", +"", +" // Uarch ECALL function codes (cartesi.UARCH_ECALL_FN_*).", +" uint64 internal constant UARCH_ECALL_FN_HALT = " .. cartesi.UARCH_ECALL_FN_HALT .. ";", +" uint64 internal constant UARCH_ECALL_FN_PUTCHAR = " .. cartesi.UARCH_ECALL_FN_PUTCHAR .. ";", +" uint64 internal constant UARCH_ECALL_FN_WRITE_TLB = " .. cartesi.UARCH_ECALL_FN_WRITE_TLB .. ";", +"", +" // Uarch state geometry (cartesi.UARCH_STATE_LOG2_SIZE).", +" uint64 internal constant UARCH_STATE_START_ADDR = AR_SHADOW_UARCH_STATE_START;", +" uint8 internal constant UARCH_STATE_LOG2_SIZE = " .. cartesi.UARCH_STATE_LOG2_SIZE .. ";", +"", +" // Root hash of the pristine uarch state (cartesi.UARCH_PRISTINE_STATE_HASH).", +" bytes32 internal constant UARCH_PRISTINE_STATE_HASH =", +" " .. hex_bytes(cartesi.UARCH_PRISTINE_STATE_HASH) .. ";", +"", +" // Hash tree geometry (cartesi.HASH_TREE_LOG2_*).", +" uint8 internal constant HASH_TREE_LOG2_WORD_SIZE = " .. cartesi.HASH_TREE_LOG2_WORD_SIZE .. ";", +" uint8 internal constant HASH_TREE_LOG2_PAGE_SIZE = " .. cartesi.HASH_TREE_LOG2_PAGE_SIZE .. ";", +" uint8 internal constant HASH_TREE_LOG2_ROOT_SIZE = " .. cartesi.HASH_TREE_LOG2_ROOT_SIZE .. ";", +"", +" // Step log header hash_function codes (cartesi.HASH_FUNCTION_*). The Solidity", +" // verifier implements keccak256 only; sha256 is the zkVM path (RISC0).", +" uint8 internal constant HASH_FUNCTION_KECCAK256 = " .. cartesi.HASH_FUNCTION_KECCAK256 .. ";", +" uint8 internal constant HASH_FUNCTION_SHA256 = " .. cartesi.HASH_FUNCTION_SHA256 .. ";", +"", +" // Page geometry (derived).", +" uint256 internal constant PAGE_SIZE = 1 << HASH_TREE_LOG2_PAGE_SIZE;", +" uint64 internal constant PAGE_OFFSET_MASK = (uint64(1) << HASH_TREE_LOG2_PAGE_SIZE) - 1;", +" uint256 internal constant LEAF_SIZE = uint256(1) << HASH_TREE_LOG2_WORD_SIZE;", +"", +" // CMIO buffer geometry (cartesi.AR_CMIO_RX_BUFFER_*).", +" uint64 internal constant AR_CMIO_RX_BUFFER_START = " .. hex(cartesi.AR_CMIO_RX_BUFFER_START) .. ";", +" uint8 internal constant AR_CMIO_RX_BUFFER_LOG2_SIZE = " .. cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE .. ";", +"", +" // Shadow TLB layout.", +" uint64 internal constant AR_SHADOW_TLB_START = " .. hex(cartesi.AR_SHADOW_TLB_START) .. ";", +" uint8 internal constant SHADOW_TLB_SLOT_LOG2_SIZE = " .. SHADOW_TLB_SLOT_LOG2_SIZE .. ";", +" uint64 internal constant SHADOW_TLB_SLOT_SIZE = uint64(1) << SHADOW_TLB_SLOT_LOG2_SIZE;", +" uint64 internal constant TLB_SET_SIZE = " .. TLB_SET_SIZE .. ";", +" uint64 internal constant SHADOW_TLB_SET_LENGTH = TLB_SET_SIZE * SHADOW_TLB_SLOT_SIZE;", +"", +" // Shadow register addresses (cartesi.machine:get_reg_address).", +" uint64 internal constant IFLAGS_Y_ADDRESS = " .. hex(reg_addr("iflags_Y")) .. ";", +" uint64 internal constant HTIF_TOHOST_ADDRESS = " .. hex(reg_addr("htif_tohost")) .. ";", +" uint64 internal constant HTIF_FROMHOST_ADDR = " .. hex(reg_addr("htif_fromhost")) .. ";", +"", +" // HTIF tohost field layout [dev:8][cmd:8][reason:16][data:32]; hand-mirrored from htif-constants.hpp.", +" uint32 internal constant HTIF_DEV_SHIFT = 0x38;", +" uint32 internal constant HTIF_CMD_SHIFT = 0x30;", +" uint32 internal constant HTIF_REASON_SHIFT = 0x20;", +" uint64 internal constant HTIF_DEV_MASK = 0xff00000000000000;", +" uint64 internal constant HTIF_CMD_MASK = 0xff000000000000;", +" uint64 internal constant HTIF_REASON_MASK = 0xffff00000000;", +" uint64 internal constant HTIF_DEV_YIELD = 0x2;", +" uint64 internal constant HTIF_YIELD_CMD_MANUAL = 0x1;", +" uint16 internal constant HTIF_YIELD_REASON_ADVANCE_STATE = " .. cartesi.HTIF_YIELD_REASON_ADVANCE_STATE .. ";", +"", +" // Shadow revert-root-hash slot - protocol-only convention; uarch never touches it.", +" uint64 internal constant AR_SHADOW_REVERT_ROOT_HASH_START = " .. hex(cartesi.AR_SHADOW_REVERT_ROOT_HASH_START) .. ";", +"", +" // Manual yield reasons (cartesi.HTIF_YIELD_MANUAL_REASON_*).", +" uint16 internal constant HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED = " .. cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED .. ";", +" uint16 internal constant HTIF_YIELD_MANUAL_REASON_RX_REJECTED = " .. cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED .. ";", +" uint16 internal constant HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION = " .. cartesi.HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION .. ";", +"", +" // Step log signature (cartesi.STEP_LOG_SIGNATURE).", +" bytes8 internal constant STEP_LOG_SIGNATURE = " .. hex_bytes(cartesi.STEP_LOG_SIGNATURE) .. ";", +"}", +"" +} + +io.write(table.concat(lines, "\n")) diff --git a/solidity-step/tools/test-transpile-uarch.lua b/solidity-step/tools/test-transpile-uarch.lua new file mode 100644 index 000000000..057f5d372 --- /dev/null +++ b/solidity-step/tools/test-transpile-uarch.lua @@ -0,0 +1,306 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: Apache-2.0 +-- +-- Golden-output unit tests for tools/transpile-uarch.lua. Each test feeds a +-- small synthetic C++ blob into the transpiler and asserts the output +-- (whitespace-normalised) matches a hand-written expected Solidity blob. +-- +-- Run from solidity-step/ via: lua5.4 tools/test-transpile-uarch.lua +-- The transpiler reads ../../src/uarch-solidity-compat.hpp and +-- src/EmulatorConstants.sol from disk to know which names to prefix, so the +-- test must be invoked with solidity-step/ as the working directory. + +package.path = "tools/?.lua;" .. package.path +local gen = require("transpile-uarch") + +local pass, fail = 0, 0 + +local function test(name, fn) + local ok, err = pcall(fn) + if ok then + pass = pass + 1 + else + fail = fail + 1 + io.stderr:write("FAIL: " .. name .. "\n " .. tostring(err) .. "\n") + end +end + +local function normalize(s) + -- Strip // comments and collapse whitespace so test assertions are robust + -- to formatting churn in the transpiler's output. + s = s:gsub("//[^\n]*", "") + s = s:gsub("%s+", " ") + s = s:gsub("^ ", ""):gsub(" $", "") + return s +end + +local function assert_transpiles(input_cpp, input_h, lib, entry, expected_sol) + local result = gen.transpile(input_cpp, input_h, lib, entry) + local actual = normalize(result:sub(#gen.LICENSE_BANNER + 1)) + local expected = normalize(expected_sol) + assert(actual == expected, + "\nexpected:\n" .. expected .. "\n\ngot:\n" .. actual) +end + + +test("uarch-step: templates stripped, STATE access prefixed, UINT64_MAX rewritten", function() + local input_cpp = [==[ + // Copyright Cartesi and individual authors (see AUTHORS) + // SPDX-License-Identifier: LGPL-3.0-or-later + + #include "uarch-step.h" + #include "uarch-record-step-state-access.h" + #include "uarch-solidity-compat.h" + + // NOLINTBEGIN(google-readability-casting,misc-const-correctness) + namespace cartesi { + + template < typename UarchState > + static inline uint64 readUint64(const UarchState a, uint64 paddr) { + return readWord(a, paddr); + } + + template + UArchStepStatus uarch_step(const UarchState a) { + uint64 cycle = readCycle(a); + if (cycle >= UINT64_MAX) { + return UArchStepStatus::CycleOverflow; + } + } + + // Explicit instantiation for uarch_state_access + template UArchStepStatus uarch_step(const uarch_state_access a); + } + // NOLINTEND(google-readability-casting,misc-const-correctness) + ]==] + local input_h = "enum class UArchStepStatus : int { Success, CycleOverflow, UArchHalted };" + + local expected_sol = [=[ + pragma solidity ^0.8.30; + + import {StepLog} from "src/StepLog.sol"; + import {StateAccess} from "src/StateAccess.sol"; + import {EmulatorConstants} from "src/EmulatorConstants.sol"; + + library UArchStep { + enum UArchStepStatus { Success, CycleOverflow, UArchHalted } + + function readUint64(StepLog.Context memory a, uint64 paddr) private pure returns (uint64) { + return StateAccess.readWord(a, paddr); + } + + function uarchStep(StepLog.Context memory a) internal pure returns (UArchStepStatus) { + uint64 cycle = StateAccess.readCycle(a); + if (cycle >= type(uint64).max) { + return UArchStepStatus.CycleOverflow; + } + } + } + ]=] + + assert_transpiles(input_cpp, input_h, "UArchStep", "uarchStep", expected_sol) +end) + + +test("uarch-reset-state: 'UarchState &a' reference form, entrypoint rename", function() + local input_cpp = [==[ + // Copyright Cartesi and individual authors (see AUTHORS) + // SPDX-License-Identifier: LGPL-3.0-or-later + + namespace cartesi { + + template + void uarch_reset_state(UarchState &a) { + resetState(a); + } + + template void uarch_reset_state(uarch_state_access &a); + } + ]==] + + local expected_sol = [=[ + pragma solidity ^0.8.30; + + import {StepLog} from "src/StepLog.sol"; + import {StateAccess} from "src/StateAccess.sol"; + import {EmulatorConstants} from "src/EmulatorConstants.sol"; + + library UArchReset { + + function uarchResetState(StepLog.Context memory a) internal pure { + StateAccess.resetState(a); + } + } + ]=] + + assert_transpiles(input_cpp, nil, "UArchReset", "uarchResetState", expected_sol) +end) + + +test("send-cmio-response: bytes32 passes through, bytes data to calldata, constants prefixed", function() + local input_cpp = [==[ + // Copyright Cartesi and individual authors (see AUTHORS) + // SPDX-License-Identifier: LGPL-3.0-or-later + + namespace cartesi { + + template + void send_cmio_response(STATE_ACCESS a, bytes32 revertRootHash, uint16 reason, bytes data, uint32 dataLength) { + writeRevertRootHash(a, revertRootHash); + if (dataLength > 0) { + uint32 writeLengthLog2Size = uint32Log2(dataLength); + writeMemoryWithPadding(a, AR_CMIO_RX_BUFFER_START, data, dataLength, writeLengthLog2Size); + } + throwRuntimeError(a, "CMIO response data is too large"); + writeHtifFromhost(a, 0); + writeIflagsY(a, 0); + } + + template void send_cmio_response(state_access a, bytes32 revertRootHash, uint16_t reason, const unsigned char *data, uint32 length); + } + ]==] + + local expected_sol = [=[ + pragma solidity ^0.8.30; + + import {StepLog} from "src/StepLog.sol"; + import {StateAccess} from "src/StateAccess.sol"; + import {EmulatorConstants} from "src/EmulatorConstants.sol"; + + library SendCmioResponse { + + function sendCmioResponse(StepLog.Context memory a, bytes32 revertRootHash, uint16, bytes calldata data, uint32 dataLength) internal pure { + StateAccess.writeRevertRootHash(a, revertRootHash); + if (dataLength > 0) { + uint32 writeLengthLog2Size = StateAccess.uint32Log2(dataLength); + StateAccess.writeMemoryWithPadding(a, EmulatorConstants.AR_CMIO_RX_BUFFER_START, data, dataLength, writeLengthLog2Size); + } + StateAccess.throwRuntimeError(a, "CMIO response data is too large"); + StateAccess.writeHtifFromhost(a, 0); + StateAccess.writeIflagsY(a, 0); + } + } + ]=] + + assert_transpiles(input_cpp, nil, "SendCmioResponse", "sendCmioResponse", expected_sol) +end) + + +test("namespace body: a brace inside a string literal does not truncate the output", function() + local input_cpp = [==[ + namespace cartesi { + template + void f(const UarchState a) { + throwRuntimeError(a, "unbalanced } brace"); + writeIflagsY(a, 0); + } + } + ]==] + local expected_sol = [=[ + pragma solidity ^0.8.30; + + import {StepLog} from "src/StepLog.sol"; + import {StateAccess} from "src/StateAccess.sol"; + import {EmulatorConstants} from "src/EmulatorConstants.sol"; + + library UArchStep { + function f(StepLog.Context memory a) private pure { + StateAccess.throwRuntimeError(a, "unbalanced } brace"); + StateAccess.writeIflagsY(a, 0); + } + } + ]=] + assert_transpiles(input_cpp, nil, "UArchStep", "uarch_step", expected_sol) +end) + + +test("strip: a semicolon inside a string literal does not end the stripped statement early", function() + local input_cpp = [==[ + namespace cartesi { + static inline void g(const UarchState a) { + [[maybe_unused]] auto note = dumpInsn(a, "has ; semicolon"); + throwRuntimeError(a, "kept"); + } + } + ]==] + local expected_sol = [=[ + pragma solidity ^0.8.30; + + import {StepLog} from "src/StepLog.sol"; + import {StateAccess} from "src/StateAccess.sol"; + import {EmulatorConstants} from "src/EmulatorConstants.sol"; + + library UArchStep { + function g(StepLog.Context memory a) private pure { + StateAccess.throwRuntimeError(a, "kept"); + } + } + ]=] + assert_transpiles(input_cpp, nil, "UArchStep", "uarch_step", expected_sol) +end) + + +test("params left unused after stripping a debug line get their names blanked", function() + -- dumpInsn is stripped via [[maybe_unused]]; insn and pc are used only there, so their + -- names are blanked while their types stay. a is used, so it keeps its name. + local input_cpp = [==[ + namespace cartesi { + static inline void executeEBREAK(const UarchState a, uint32 insn, uint64 pc) { + [[maybe_unused]] auto note = dumpInsn(a, pc, insn, "ebreak"); + throwRuntimeError(a, "uarch aborted"); + } + } + ]==] + local expected_sol = [=[ + pragma solidity ^0.8.30; + + import {StepLog} from "src/StepLog.sol"; + import {StateAccess} from "src/StateAccess.sol"; + import {EmulatorConstants} from "src/EmulatorConstants.sol"; + + library UArchStep { + function executeEBREAK(StepLog.Context memory a, uint32, uint64) private pure { + StateAccess.throwRuntimeError(a, "uarch aborted"); + } + } + ]=] + assert_transpiles(input_cpp, nil, "UArchStep", "uarch_step", expected_sol) +end) + + +local function assert_rejects(input_cpp, msg_substring) + local ok, err = pcall(gen.transpile, input_cpp, nil, "Lib", "entry") + assert(not ok, "expected transpile to reject the input, but it succeeded") + assert(tostring(err):find(msg_substring, 1, true), + "error did not mention '" .. msg_substring .. "':\n " .. tostring(err)) +end + + +test("dialect guard: rejects a raw shift on a non-literal operand", function() + assert_rejects([==[ + namespace cartesi { + template + uint64 f(UarchState a) { + return readWord(a) << 3; + } + } + ]==], "raw shift") +end) + + +test("dialect guard: rejects a native fixed-width C++ type", function() + assert_rejects([==[ + namespace cartesi { + template + void f(UarchState a) { + uint32_t x = readWord(a); + } + } + ]==], "native C++ type") +end) + + +print(string.format("\n%d passed, %d failed", pass, fail)) +if fail > 0 then os.exit(1) end diff --git a/solidity-step/tools/transpile-uarch.lua b/solidity-step/tools/transpile-uarch.lua new file mode 100644 index 000000000..a6c39df74 --- /dev/null +++ b/solidity-step/tools/transpile-uarch.lua @@ -0,0 +1,300 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: Apache-2.0 +-- +-- Transpiles machine-emulator's uarch C++ sources to Solidity. +-- Targets the binary step log replayer: produces code that operates on a +-- `StepLog.Context memory a` and calls bridge functions in `StateAccess`. + +local lpeg = require("lpeg") +local P, R, S, V, C, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cs + +local M = {} + +M.LICENSE_BANNER = [=[// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//]=] + +local ws = S(" \t") +local nl = P("\n") +local ws_nl = ws + nl +local ident = (R("az","AZ") + P("_")) * (R("az","AZ","09") + P("_"))^0 +-- Lexical tokens shared by the passes that must skip over strings and comments. +local string_literal = P('"') * (P('\\') * 1 + (1 - P('"')))^0 * P('"') +local line_comment = P("//") * (1 - nl)^0 +local block_comment = P("/*") * (1 - P("*/"))^0 * P("*/") + +local function extract_namespace_body(src) + -- Skip strings and comments so a brace inside one is not taken for a namespace delimiter. + local inner = (string_literal + line_comment + block_comment + (1 - S("{}")) + V(1))^0 + local pattern = (1 - P("namespace cartesi"))^0 + * P("namespace cartesi") * ws^0 + * P{ "{" * C(inner) * "}" } + return assert(pattern:match(src), "namespace cartesi not found") +end + +local function strip_cpp_only(src) + local rest_of_line = (1 - nl)^0 * nl + -- Skip strings and comments so a ';' inside one is not taken for the statement terminator. + local until_semicolon = (string_literal + line_comment + (1 - P(";")))^0 * P(";") * ws^0 * nl^-1 + local pattern = Cs(( + (ws^0 * P("template") * ws^0 * P("<") * (1 - P(">"))^0 * P(">") * ws^0) / "" + + (ws^0 * P("template") * until_semicolon) / "" + + (ws^0 * P("[[maybe_unused]]") * until_semicolon) / "" + + (ws^0 * P("(void)") * ws_nl^1 * P("note") * until_semicolon) / "" + + (ws^0 * P("// Explicit instantiat") * rest_of_line) / "" + + (P("constexpr") * ws^1) / "" + + 1 + )^0) + return pattern:match(src) +end + +local function convert_fn_signatures(src, entrypoint) + local static_inline = P("static") * ws^1 * P("inline") * ws^1 + local leading_ws = C(ws^0) + local return_type = C(ident) + local func_name = C(ident) + local param_list = P("(") * C((1 - P(")"))^0) * P(")") + local open_brace = P("{") + local fn_sig = leading_ws * static_inline^-1 + * return_type * ws^1 * func_name + * ws^0 * param_list + * ws^0 * open_brace + local rewrite = fn_sig / function(indent, ret_type, name, params) + -- C++ template-parameter accessor types to the concrete Solidity type. + -- Variable name `a` is preserved so the body's `readWord(a, ...)` calls still parse. + params = params:gsub("const%s+StateAccess%s+a", "StepLog.Context memory a") + params = params:gsub("StateAccess%s+&a", "StepLog.Context memory a") + params = params:gsub("const%s+UarchState%s+a", "StepLog.Context memory a") + params = params:gsub("UarchState%s+&a", "StepLog.Context memory a") + params = params:gsub("STATE_ACCESS%s+a", "StepLog.Context memory a") + -- send_cmio_response: rewrite the C++ `bytes data` / `const unsigned char *data` + -- parameter to Solidity calldata bytes; the verifier computes the padded-data + -- Merkle hash on-chain. + params = params:gsub("bytes%s+data", "bytes calldata data") + params = params:gsub("const%s+unsigned%s+char%s+%*data", "bytes calldata data") + -- Rename only the public entry points; internal helpers keep their C++ names + -- so grep-trace from Solidity back to C++ works. + local fn_renames = { + uarch_step = "uarchStep", + uarch_reset_state = "uarchResetState", + send_cmio_response = "sendCmioResponse", + } + name = fn_renames[name] or name + local visibility = name == entrypoint and "internal" or "private" + local ret = ret_type ~= "void" and (" returns (" .. ret_type .. ")") or "" + return indent .. "function " .. name .. "(" .. params .. ") " .. visibility .. " pure" .. ret .. " {" + end + return Cs((rewrite + 1)^0):match(src) +end + +local function transform_code(src, fn) + local not_string_or_comment = (1 - string_literal - line_comment)^1 + local code = C(not_string_or_comment) / fn + return Cs((string_literal + line_comment + code)^0):match(src) +end + +local function cpp_to_solidity_syntax(src) + return transform_code(src, function(code) + code = code:gsub("const%s+(u?int%d+)", "%1") + code = code:gsub("::", ".") + code = code:gsub("UINT64_MAX", "type(uint64).max") + return code + end) +end + +-- The transpiler does only mechanical rewrites; it does NOT translate shift width, +-- integer width, or signedness. The uarch C++ sources are written in a Solidity- +-- compatible dialect where those operations go through uarch-solidity-compat.hpp +-- helpers (uint32ShiftLeft, int32ShiftRight, ...) and use the intN/uintN aliases. +-- This guard fails transpilation if a raw shift on a non-literal operand, or a native +-- fixed-width C++ type (intN_t/uintN_t), reaches the body: either would silently emit +-- Solidity whose overflow/sign/width semantics diverge from the C++/RISC0 replayers. +-- Constant-only shifts in decode masks, e.g. (0x7f << 25), are allowed because both +-- operands are literals. +local function assert_solidity_dialect(body) + local function is_int_literal(tok) + return tok ~= "" and (tok:match("^%d+$") ~= nil or tok:match("^0[xX]%x+$") ~= nil) + end + transform_code(body, function(code) + local native = code:match("%f[%w]u?int%d+_t%f[%W]") + if native then + error("transpile dialect guard: native C++ type '" .. native + .. "' reached the transpiled body. Use the Solidity-dialect alias (intN/uintN) " + .. "routed through uarch-solidity-compat.hpp, not a native " .. native .. ".") + end + for lhs, op, rhs in code:gmatch("([%w_]*)%s*([<>][<>])%s*([%w_]*)") do + if (op == "<<" or op == ">>") and not (is_int_literal(lhs) and is_int_literal(rhs)) then + error("transpile dialect guard: raw shift '" + .. (lhs == "" and "" or lhs) .. " " .. op .. " " + .. (rhs == "" and "" or rhs) + .. "' on a non-literal operand. The transpiler does not translate shift " + .. "width/semantics; route variable shifts through a uarch-solidity-compat.hpp " + .. "helper (e.g. uint32ShiftLeft). Only constant decode masks like (0x7f << 25) " + .. "are allowed.") + end + end + return code + end) +end + +local function prefix_names(src, names, prefix) + return transform_code(src, function(code) + for name in pairs(names) do + code = code:gsub("%f[%w]" .. name .. "%f[%W]", prefix .. name) + end + return code + end) +end + +local function extract_names(src, pattern) + local t = {} + for name in src:gmatch(pattern) do t[name] = true end + return t +end + +-- Blank parameter names that are unused in the body, to avoid "unused parameter" warnings in Solidity. +local function blank_unused_param_names(src) + local body = P{ "{" * (string_literal + line_comment + block_comment + (1 - S("{}")) + V(1))^0 * "}" } + local fn = C(P("function") * ws_nl^1 * ident * ws_nl^0 * P("(")) -- prefix incl '(' + * C((1 - P(")"))^0) -- params + * C(P(")") * (1 - P("{"))^0) -- ') visibility pure ...' + * C(body) + + local function strip_noncode(s) + return (s:gsub("/%*.-%*/", " "):gsub("//[^\n]*", " "):gsub('".-"', " ")) + end + + local function rewrite(prefix, params, suffix, fn_body) + local code = strip_noncode(fn_body) + local kept = {} -- params to keep, after dropping unused names + for param in (params .. ","):gmatch("%s*(.-)%s*,") do + if param ~= "" then + local without_name, name = param:match("^(.*%S)%s+([%w_]+)$") + if name and not code:find("%f[%w]" .. name .. "%f[%W]") then + kept[#kept + 1] = without_name -- drop the unused name, keep the type + else + kept[#kept + 1] = param + end + end + end + return prefix .. table.concat(kept, ", ") .. suffix .. fn_body + end + + return Cs((fn / rewrite + 1)^0):match(src) +end + +local script_dir = debug.getinfo(1, "S").source:match("^@(.*/)") or "./" +local emulator_src_dir = script_dir .. "../../src/" +local solidity_src_dir = script_dir .. "../src/" + +local function read_file(path) + local f = assert(io.open(path, "r")) + return f:read("a") +end + +function M.transpile(cpp_src, h_src, lib_name, entrypoint) + local body = extract_namespace_body(cpp_src) + body = strip_cpp_only(body) + body = convert_fn_signatures(body, entrypoint) + body = cpp_to_solidity_syntax(body) + assert_solidity_dialect(body) + + -- Bridge function names live in uarch-solidity-compat.hpp on the emulator side. + -- Match `static inline RETURN NAME(...)` to pick out the function names. + local compat_src = read_file(emulator_src_dir .. "uarch-solidity-compat.hpp") + local compat_fn_names = extract_names( + compat_src, + "static%s+inline%s+[%w_]+%s+([%w_]+)%s*%(" + ) + body = prefix_names(body, compat_fn_names, "StateAccess.") + + -- Constants live in solidity-step/src/EmulatorConstants.sol. + local constants_names = extract_names( + read_file(solidity_src_dir .. "EmulatorConstants.sol"), + "constant%s+([%w_]+)" + ) + body = prefix_names(body, constants_names, "EmulatorConstants.") + + body = blank_unused_param_names(body) + + -- Extract enums from the companion header (e.g. UArchStepStatus). + local enums = "" + if h_src then + for ename, ebody in h_src:gmatch("enum class (%w+)%s*:%s*[%w_]+%s*{([^}]*)}") do + enums = enums .. " enum " .. ename .. " {" .. ebody .. "}\n" + end + end + + return M.LICENSE_BANNER .. "\n\n" + .. "/// @dev This file is generated from C++ by solidity-step/tools/transpile-uarch.lua\n\n" + .. "pragma solidity ^0.8.30;\n\n" + .. "import {StepLog} from \"src/StepLog.sol\";\n" + .. "import {StateAccess} from \"src/StateAccess.sol\";\n" + .. "import {EmulatorConstants} from \"src/EmulatorConstants.sol\";\n\n" + .. "library " .. lib_name .. " {\n" + .. enums .. body .. "\n" + .. "}\n" +end + +local function help() + io.stderr:write(string.format([=[ +Usage: + + %s + +Transpile a C++ uarch source file into a Solidity library. + +Arguments: + + input-cpp C++ source file to transpile (e.g. ../src/uarch-step.cpp). + If a companion .hpp or .h exists, enums are extracted from it. + output-sol Output .sol file path (e.g. src/UArchStep.sol) + library-name Solidity library name (e.g. UArchStep) + entry-function The library's public entry point (e.g. uarchStep). + Gets "internal" visibility; all others get "private". + +]=], arg[0])) + os.exit() +end + +local function main() + if not arg[1] or arg[1] == "-h" or arg[1] == "--help" then help() end + local input_cpp = arg[1] + local output_sol = assert(arg[2], "missing output-sol") + local library_name = assert(arg[3], "missing library-name") + local entry_function = assert(arg[4], "missing entry-function") + + local input_cpp_src = read_file(input_cpp) + local input_h_src + for _, ext in ipairs({".hpp", ".h"}) do + local h_path = input_cpp:gsub("%.cpp$", ext) + local ok, src = pcall(read_file, h_path) + if ok then input_h_src = src; break end + end + + local result = M.transpile(input_cpp_src, input_h_src, library_name, entry_function) + + local f = assert(io.open(output_sol, "w")) + f:write(result) + io.stderr:write("Generated " .. output_sol .. "\n") +end + +if arg and arg[0] and arg[0]:match("/transpile%-uarch%.lua$") then + main() +end + +return M diff --git a/src/access-log.hpp b/src/access-log.hpp deleted file mode 100644 index dc934ba50..000000000 --- a/src/access-log.hpp +++ /dev/null @@ -1,364 +0,0 @@ -// Copyright Cartesi and individual authors (see AUTHORS) -// SPDX-License-Identifier: LGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation, either version 3 of the License, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License along -// with this program (see COPYING). If not, see . -// - -#ifndef ACCESS_LOG_HPP -#define ACCESS_LOG_HPP - -/// \file -/// \brief State access log implementation - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "assert-printf.hpp" -#include "bracket-note.hpp" -#include "cm.h" -#include "hash-tree-constants.hpp" -#include "hash-tree.hpp" -#include "machine-hash.hpp" -#include "strict-aliasing.hpp" - -namespace cartesi { - -/// \brief Type of state access -enum class access_type { - read, ///< Read operation - write, ///< Write operation -}; - -using access_data = boost::container::small_vector; - -static inline void set_word_access_data(uint64_t w, access_data &ad) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - auto *p = reinterpret_cast(&w); - ad.clear(); - ad.insert(ad.end(), p, p + sizeof(w)); -} - -static inline void replace_word_access_data(uint64_t w, access_data &ad, uint64_t offset = 0) { - assert(ad.size() >= offset + sizeof(uint64_t)); - aliased_aligned_write(ad.data() + offset, w); -} - -static inline uint64_t get_word_access_data(const access_data &ad, uint64_t offset = 0) { - assert(ad.size() >= offset + sizeof(uint64_t)); - return aliased_aligned_read(ad.data() + offset); -} - -/// \brief Records an access to the machine state -class access { - -public: - using proof_type = hash_tree::proof_type; - using sibling_hashes_type = hash_tree::sibling_hashes_type; - - void set_type(access_type type) { - m_type = type; - } - access_type get_type() const { - return m_type; - } - - /// \brief Sets log2 of size of access. - /// \param log2_size New log2 of size of access. - void set_log2_size(int log2_size) { - m_log2_size = log2_size; - } - - /// \brief Gets log2 of size of access. - /// \returns log2 of size. - int get_log2_size() const { - return m_log2_size; - } - - /// \brief Sets address of access. - /// \param address New address. - void set_address(uint64_t address) { - m_address = address; - } - - /// \brief Gets address of access. - /// \returns Address. - uint64_t get_address() const { - return m_address; - } - - /// \brief Sets data that can be read at address before access. - /// \param read Data at address. - void set_read(const access_data &read) { - m_read = read; - } - void set_read(access_data &&read) { - m_read = std::move(read); - } - - /// \brief Gets data that can be read at address before access. - /// \returns Data at address. - const std::optional &get_read() const { - return m_read; - } - std::optional &get_read() { - return m_read; - } - - /// \brief Sets data that was written at address after access. - /// \param written New data at address. - void set_written(const access_data &written) { - m_written = written; - } - void set_written(access_data &&written) { - m_written = std::move(written); - } - - /// \brief Gets data that was written at address after access. - /// \returns Data at address. - const std::optional &get_written() const { - return m_written; - } - std::optional &get_written() { - return m_written; - } - - /// \brief Sets hash of data that was written at address after access. - /// \param hash Hash of new data at address. - void set_written_hash(const machine_hash &hash) { - m_written_hash = hash; - } - - /// \brief Gets hash of data that was written at address after access. - /// \returns Hash of written data at address. - const std::optional &get_written_hash() const { - return m_written_hash; - } - std::optional &get_written_hash() { - return m_written_hash; - } - - /// \brief Sets hash of data that can be read at address before access. - /// \param hash Hash of data at address. - void set_read_hash(const machine_hash &hash) { - m_read_hash = hash; - } - - /// \brief Gets hash of data that can be read at address before access. - /// \returns Hash of data at address. - const machine_hash &get_read_hash() const { - return m_read_hash; - } - machine_hash &get_read_hash() { - return m_read_hash; - } - - /// \brief Constructs a proof using this access' data and a given root hash. - /// \param root_hash Hash to be used as the root of the proof. - /// \return The corresponding proof - proof_type make_proof(const machine_hash root_hash) const { - // the access can be of data smaller than the hash tree word size - // however, the proof must be at least as big as the hash tree word size - const int proof_log2_size = std::max(m_log2_size, HASH_TREE_LOG2_WORD_SIZE); - // the proof address is the access address aligned to the hash tree word size - const uint64_t proof_address = m_address & ~(HASH_TREE_WORD_SIZE - 1); - if (!m_sibling_hashes.has_value()) { - throw std::runtime_error("can't make proof if access doesn't have sibling hashes"); - } - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - const auto &sibling_hashes = m_sibling_hashes.value(); - const int log2_root_size = proof_log2_size + static_cast(sibling_hashes.size()); - if (m_read.has_value() && m_read.value().size() != (static_cast(1) << proof_log2_size)) { - throw std::runtime_error("access read data size is inconsistent with proof size"); - } - if (m_written.has_value() && m_written.value().size() != (static_cast(1) << proof_log2_size)) { - throw std::runtime_error("access written data size is inconsistent with proof size"); - } - proof_type proof(log2_root_size, proof_log2_size); - proof.set_root_hash(root_hash); - proof.set_target_address(proof_address); - proof.set_target_hash(m_read_hash); - for (int log2_size = proof_log2_size; log2_size < log2_root_size; log2_size++) { - proof.set_sibling_hash(sibling_hashes[log2_size - proof_log2_size], log2_size); - } - return proof; - } - - std::optional &get_sibling_hashes() { - return m_sibling_hashes; - } - const std::optional &get_sibling_hashes() const { - return m_sibling_hashes; - } - - void set_sibling_hashes(const sibling_hashes_type &sibling_hashes) { - m_sibling_hashes = sibling_hashes; - } - -private: - access_type m_type{0}; ///< Type of access - uint64_t m_address{0}; ///< Address of access - int m_log2_size{0}; ///< Log2 of size of access - std::optional m_read; ///< Data before access - machine_hash m_read_hash{}; ///< Hash of data before access - std::optional m_written; ///< Written data - std::optional m_written_hash; ///< Hash of written data - std::optional m_sibling_hashes; ///< Hashes of siblings in path from address to root -}; - -/// \brief Log of state accesses -class access_log { -public: - /// \brief Type of access log - class type { - bool m_annotations; ///< Includes annotations - bool m_large_data; ///< Includes data bigger than 8 bytes - public: - /// \brief Default constructor - /// \param annotations Include annotations (default false) - /// \param large_data Include large data (default false) - explicit type(bool annotations = false, bool large_data = false) : - m_annotations(annotations), - m_large_data(large_data) { - ; - } - explicit type(int log_type) : - m_annotations(static_cast(log_type & CM_ACCESS_LOG_TYPE_ANNOTATIONS)), - m_large_data(static_cast(log_type & CM_ACCESS_LOG_TYPE_LARGE_DATA)) { - ; - } - - /// \brief Returns whether log includes annotations - bool has_annotations() const { - return m_annotations; - } - - /// \brief Returns whether log includes data bigger than 8 bytes - bool has_large_data() const { - return m_large_data; - } - }; - -private: - std::vector m_accesses; ///< List of all accesses - std::vector m_brackets; ///< Begin/End annotations - std::vector m_notes; ///< Per-access annotations - type m_log_type; ///< Log type - std::vector::size_type m_outstanding_ends; ///< Number of outstanding unmatched end brackets - -public: - explicit access_log(type log_type) : m_log_type(log_type), m_outstanding_ends{0} { - ; - } - - template - access_log(ACCESSES &&accesses, BRACKETS &&brackets, NOTES &¬es, type log_type) : - m_accesses(std::forward(accesses)), - m_brackets(std::forward(brackets)), - m_notes(std::forward(notes)), - m_log_type(log_type), - m_outstanding_ends(0) { - for (const auto &b : m_brackets) { - if (b.type == bracket_type::begin) { - ++m_outstanding_ends; - } - if (b.type == bracket_type::end && m_outstanding_ends > 0) { - --m_outstanding_ends; - } - }; - } - - /// \brief Clear the log - void clear() { - m_accesses.clear(); - m_notes.clear(); - m_brackets.clear(); - m_outstanding_ends = 0; - } - - /// \brief Adds a bracket annotation to the log (if the log type includes annotations) - /// \param type Bracket type - /// \param text Annotation contents - void push_begin_bracket(const char *text) { - if (m_log_type.has_annotations()) { - // Increment number of outstanding end brackets we are expecting - ++m_outstanding_ends; - // Make sure we have room for the matching end bracket as well. - // That way, unless the user is messing with unbalanced brackets, there is no way we - // would throw an exception for lack of memory on the matching end bracket - m_brackets.push_back(bracket_note{.type = bracket_type::begin, .where = m_accesses.size(), .text = text}); - m_brackets.reserve(m_brackets.size() + m_outstanding_ends); - } - } - - void push_end_bracket(const char *text) noexcept { - if (m_log_type.has_annotations()) { - // If we failed to push, it was because the system is completely screwed anyway *and* the - // user is using unbalanced brackets. Therefore, it's OK to quietly ignore the error. - try { - m_brackets.push_back(bracket_note{.type = bracket_type::end, .where = m_accesses.size(), .text = text}); - } catch (...) { // NOLINT(bugprone-empty-catch) - } - // Decrement number of outstanding end brackets we are expecting - if (m_outstanding_ends > 0) { - --m_outstanding_ends; - } - } - } - - /// \brief Adds a new access to the log - /// \tparam A Type of access - /// \param a Access object - /// \param text Annotation contents (added if the log type includes annotations, ignored otherwise) - template - void push_access(A &&a, const char *text) { - m_accesses.push_back(std::forward(a)); - if (m_log_type.has_annotations()) { - m_notes.emplace_back(text); - } - } - - /// \brief Returns the array of notes - /// \return Constant reference to array - const std::vector &get_notes() const { - return m_notes; - } - - /// \brief Returns the array of accesses - /// \return Constant reference to array - const std::vector &get_accesses() const { - return m_accesses; - } - - /// \brief Returns the array of brackets - /// \return Constant reference to array - const std::vector &get_brackets() const { - return m_brackets; - } - - /// \brief Returns the log type - /// \return Log type - type get_log_type() const { - return m_log_type; - } -}; - -} // namespace cartesi - -#endif diff --git a/src/bracket-note.hpp b/src/bracket-note.hpp deleted file mode 100644 index 79328aeef..000000000 --- a/src/bracket-note.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright Cartesi and individual authors (see AUTHORS) -// SPDX-License-Identifier: LGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation, either version 3 of the License, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License along -// with this program (see COPYING). If not, see . -// - -#ifndef BRACKET_NOTE_HPP -#define BRACKET_NOTE_HPP - -#include -#include - -/// \file -/// \brief Bracket annotation for access log - -namespace cartesi { - -/// \brief Bracket type -enum class bracket_type { - begin, ///< Start of scope - end ///< End of scope -}; - -/// \brief Bracket note -struct bracket_note { - bracket_type type{bracket_type::begin}; ///< Bracket type - uint64_t where{0}; ///< Where it points to in the log - std::string text; ///< Note text -}; - -} // namespace cartesi - -#endif diff --git a/src/cartesi-machine.lua b/src/cartesi-machine.lua index 0205750d4..af9c896e8 100755 --- a/src/cartesi-machine.lua +++ b/src/cartesi-machine.lua @@ -603,11 +603,37 @@ where options are: --log-step=, log and save a step of mcycles to . - --log-step-uarch - advance one micro step and print access log. + --log-step-uarch=,[,pretty[=]] + log microarchitecture cycles to as a binary + step log. logging stops early at the uarch halt, so a count at or above the + per-mcycle uarch budget records one whole mcycle. + append ",pretty" to also write a human-readable printout to stderr, + or ",pretty=" to write that printout to file . - --log-reset-uarch - reset the microarchitecture state and print the access log. + --log-reset-uarch= + reset the microarchitecture state and write a binary step log to . + + --log-send-cmio-response=:[,:[,...]...] + send a cmio response to the rx buffer and write a binary step log to a file. + runs after the machine has reached its terminal state. The machine should be + in a yielded state (iflags.Y == 1); otherwise the logged transition is a no-op. + + : is one of + reason: + filename: + file: + hex:<0x...> + str: + + reason (required) + the cmio yield reason (e.g., a HTIF_YIELD_*_REASON_* constant). + + filename (required) + path of the step log file to write. + + file | hex | str (exactly one required) + source for the response bytes. "file:" reads from a binary file, + "hex:" takes a 0x-prefixed hex string, "str:" takes literal text. --auto-reset-uarch reset uarch automatically after halt. @@ -816,9 +842,12 @@ local dense_uarch_hashes_end local dump_memory_ranges = false local max_mcycle = math.maxinteger local max_uarch_cycle = 0 -local log_step_uarch = false +local log_step_uarch_filename +local log_step_uarch_cycles +local log_step_uarch_pretty +local log_send_cmio_response_opts local auto_reset_uarch = false -local log_reset_uarch = false +local log_reset_uarch_filename local store_dir local load_dir local create_dir @@ -1588,18 +1617,47 @@ local options = { end, }, { - "^%-%-log%-step%-uarch$", - function(all) - if not all then return false end - log_step_uarch = true + "^%-%-log%-step%-uarch%=(.*)$", + function(value) + if not value then return false end + local count, rest = value:match("^(.-),(.*)$") + assert(count, "--log-step-uarch expects ,[,pretty[=]]") + log_step_uarch_cycles = + assert(util.parse_number(count), "invalid --log-step-uarch cycle count '" .. count .. "'") + local bin, pretty = rest:match("^(.-),pretty=?(.*)$") + if bin then + log_step_uarch_filename = bin + log_step_uarch_pretty = pretty -- "" means stderr, otherwise an output path + else + assert(not rest:find(","), "unrecognized --log-step-uarch sub-option (expected ,pretty[=])") + log_step_uarch_filename = rest + end return true end, }, { - "^%-%-log%-reset%-uarch$", - function(all) - if not all then return false end - log_reset_uarch = true + "^%-%-log%-reset%-uarch%=(.*)$", + function(filename) + if not filename then return false end + log_reset_uarch_filename = filename + return true + end, + }, + { + "^(%-%-log%-send%-cmio%-response%=(.+))$", + function(all, opts) + local o = util.parse_options(opts, all, { + reason = "number", + filename = "string", + file = "string", + hex = "string", + str = "string", + }) + assert(o.reason, "missing reason for --log-send-cmio-response") + assert(o.filename, "missing filename for --log-send-cmio-response") + local sources = (o.file and 1 or 0) + (o.hex and 1 or 0) + (o.str and 1 or 0) + assert(sources == 1, "--log-send-cmio-response requires exactly one of file:, hex:, str:") + log_send_cmio_response_opts = o return true end, }, @@ -2247,20 +2305,20 @@ elseif store_json_config then end local cmio_yield_automatic_reason = { - [cartesi.CMIO_YIELD_AUTOMATIC_REASON_PROGRESS] = "progress", - [cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT] = "tx-output", - [cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT] = "tx-report", + [cartesi.HTIF_YIELD_AUTOMATIC_REASON_PROGRESS] = "progress", + [cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT] = "tx-output", + [cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT] = "tx-report", } local cmio_yield_manual_reason = { - [cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED] = "rx-accepted", - [cartesi.CMIO_YIELD_MANUAL_REASON_RX_REJECTED] = "rx-rejected", - [cartesi.CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION] = "tx-exception", + [cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED] = "rx-accepted", + [cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED] = "rx-rejected", + [cartesi.HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION] = "tx-exception", } local cmio_yield_command = { - [cartesi.CMIO_YIELD_COMMAND_MANUAL] = "Manual", - [cartesi.CMIO_YIELD_COMMAND_AUTOMATIC] = "Automatic", + [cartesi.HTIF_YIELD_CMD_MANUAL] = "Manual", + [cartesi.HTIF_YIELD_CMD_AUTOMATIC] = "Automatic", } local function check_cmio_htif_config(htif) @@ -2273,7 +2331,7 @@ end local function get_and_print_yield(machine, htif) local cmd, reason, data = machine:receive_cmio_request() - if cmd == cartesi.CMIO_YIELD_COMMAND_AUTOMATIC and reason == cartesi.CMIO_YIELD_AUTOMATIC_REASON_PROGRESS then + if cmd == cartesi.HTIF_YIELD_CMD_AUTOMATIC and reason == cartesi.HTIF_YIELD_AUTOMATIC_REASON_PROGRESS then stderr( "Progress: %6.2f" .. ((htif.iconsole & cartesi.HTIF_CONSOLE_CMD_GETCHAR_MASK) ~= 0 and "\n" or "\r"), string.unpack("I4", data) / 10 @@ -2282,9 +2340,9 @@ local function get_and_print_yield(machine, htif) end local cmd_str = cmio_yield_command[cmd] or "Unknown" local reason_str = "unknown" - if cmd == cartesi.CMIO_YIELD_COMMAND_AUTOMATIC then + if cmd == cartesi.HTIF_YIELD_CMD_AUTOMATIC then reason_str = cmio_yield_automatic_reason[reason] or reason_str - elseif cmd == cartesi.CMIO_YIELD_COMMAND_MANUAL then + elseif cmd == cartesi.HTIF_YIELD_CMD_MANUAL then reason_str = cmio_yield_manual_reason[reason] or reason_str end stderr("\n%s yield %s (%d) (0x%06x data)\n", cmd_str, reason_str, reason, #data) @@ -2327,14 +2385,14 @@ local function load_cmio_input(machine, advance) local f = assert(io.open(filename, "rb")) local data = assert(f:read("*a")) f:close() - machine:send_cmio_response(cartesi.CMIO_YIELD_REASON_ADVANCE_STATE, data) + machine:send_cmio_response(machine:get_root_hash(), cartesi.HTIF_YIELD_REASON_ADVANCE_STATE, data) end local function load_cmio_query(machine, inspect) local f = assert(io.open(inspect.query, "rb")) local data = assert(f:read("*a")) f:close() - machine:send_cmio_response(cartesi.CMIO_YIELD_REASON_INSPECT_STATE, data) + machine:send_cmio_response(machine:get_root_hash(), cartesi.HTIF_YIELD_REASON_INSPECT_STATE, data) end local function save_cmio_inspect_state_report(inspect, data) @@ -2503,7 +2561,7 @@ while math.ult(machine:read_reg("mcycle"), max_mcycle) do elseif machine:read_reg("iflags_Y") ~= 0 then local _, reason, data = get_and_print_yield(machine, config.processor.registers.htif) -- there was an exception - if reason == cartesi.CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION then + if reason == cartesi.HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION then stderr("cmio exception with payload: %q\n", data) exit_code = 1 do_rollback(machine) @@ -2511,7 +2569,7 @@ while math.ult(machine:read_reg("mcycle"), max_mcycle) do -- there are advance state inputs to feed elseif cmio_advance and cmio_advance.next_input_index < cmio_advance.input_index_end then -- previous reason was an accept - if reason == cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED then + if reason == cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED then do_commit() -- save only if we have already run an input and have just accepted it if cmio_advance.next_input_index > cmio_advance.input_index_begin then @@ -2519,7 +2577,7 @@ while math.ult(machine:read_reg("mcycle"), max_mcycle) do save_cmio_output_hashes_root_hash(cmio_advance, data) end -- previous reason was a reject - elseif reason == cartesi.CMIO_YIELD_MANUAL_REASON_RX_REJECTED then + elseif reason == cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED then do_rollback(machine) else error("unexpected manual yield reason") @@ -2535,11 +2593,11 @@ while math.ult(machine:read_reg("mcycle"), max_mcycle) do else if cmio_advance and cmio_advance.next_input_index > cmio_advance.input_index_begin then -- there are outputs of a previous advance state to save - if reason == cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED then + if reason == cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED then assert(#data == 32, "expected root hash in tx buffer") save_cmio_output_hashes_root_hash(cmio_advance, data) do_commit() - elseif reason == cartesi.CMIO_YIELD_MANUAL_REASON_RX_REJECTED then + elseif reason == cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED then do_rollback(machine) end cmio_advance = nil @@ -2568,17 +2626,17 @@ while math.ult(machine:read_reg("mcycle"), max_mcycle) do local _, reason, data = get_and_print_yield(machine, config.processor.registers.htif) -- we have fed an advance state input if cmio_advance and cmio_advance.next_input_index > cmio_advance.input_index_begin then - if reason == cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT then + if reason == cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT then save_cmio_output(cmio_advance, data) cmio_advance.output_index = cmio_advance.output_index + 1 - elseif reason == cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT then + elseif reason == cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT then save_cmio_report(cmio_advance, data) cmio_advance.report_index = cmio_advance.report_index + 1 end -- ignore other reasons -- we have feed the inspect state query elseif cmio_inspect and not cmio_inspect.query then - if reason == cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT then + if reason == cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT then save_cmio_inspect_state_report(cmio_inspect, data) cmio_inspect.report_index = cmio_inspect.report_index + 1 end @@ -2620,14 +2678,39 @@ if max_uarch_cycle > 0 then end end if gdb_stub then gdb_stub:close() end -if log_step_uarch then +if log_step_uarch_filename then assert(config.processor.registers.iunrep == 0, "micro step proof is meaningless in unreproducible mode") stderr("Gathering micro step log: please wait\n") - util.dump_log(machine:log_step_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS), io.stderr) + machine:log_step_uarch(log_step_uarch_cycles, log_step_uarch_filename) + if log_step_uarch_pretty then + local printout = cartesi.machine:pretty_print_step_uarch(log_step_uarch_filename) + -- "" routes the printout to stderr; otherwise it goes to the given file. + if log_step_uarch_pretty ~= "" then + local out = assert(io.open(log_step_uarch_pretty, "w")) + out:write(printout) + else + io.stderr:write(printout) + end + end end -if log_reset_uarch then +if log_reset_uarch_filename then stderr("Resetting microarchitecture state: please wait\n") - util.dump_log(machine:log_reset_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS), io.stderr) + machine:log_reset_uarch(log_reset_uarch_filename) +end +if log_send_cmio_response_opts then + local o = log_send_cmio_response_opts + local data + if o.file then + local f = assert(io.open(o.file, "rb")) + data = assert(f:read("*a")) + elseif o.hex then + assert(o.hex:sub(1, 2) == "0x" and #o.hex % 2 == 0, "hex must be 0x-prefixed with even length") + data = (o.hex:sub(3):gsub("(%x%x)", function(c) return string.char(tonumber(c, 16)) end)) + else + data = o.str + end + stderr("Logging cmio response: please wait\n") + machine:log_send_cmio_response(machine:get_root_hash(), o.reason, data, o.filename) end if dump_memory_ranges then dump_pmas(machine) end if final_hash then @@ -2638,7 +2721,7 @@ dump_value_proofs(machine, final_proof, config) if store_dir then store_machine(machine, config, store_dir, store_sharing) end if assert_rolling_template then local cmd, reason = machine:receive_cmio_request() - if not (cmd == cartesi.CMIO_YIELD_COMMAND_MANUAL and reason == cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED) then + if not (cmd == cartesi.HTIF_YIELD_CMD_MANUAL and reason == cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED) then exit_code = 2 end end diff --git a/src/cartesi/util.lua b/src/cartesi/util.lua index 4b5190c39..d784edcc0 100644 --- a/src/cartesi/util.lua +++ b/src/cartesi/util.lua @@ -14,8 +14,6 @@ -- with this program (see COPYING). If not, see . -- -local cartesi = require("cartesi") - local _M = {} local function indentout(f, indent, fmt, ...) f:write(string.rep(" ", indent), string.format(fmt, ...)) end @@ -54,90 +52,6 @@ end _M.dump_json_proof = dump_json_proof -local function dump_json_log_notes(notes, out, indent) - local n = #notes - for i, note in ipairs(notes) do - indentout(out, indent, '"%s"', note) - if i < n then - out:write(",\n") - else - out:write("\n") - end - end -end - -local function dump_json_log_brackets(brackets, out, indent) - local n = #brackets - for i, bracket in ipairs(brackets) do - indentout(out, indent, "{\n") - indentout(out, indent + 1, '"type": "%s",\n', bracket.type) - indentout(out, indent + 1, '"where": %u,\n', bracket.where) - indentout(out, indent + 1, '"text": "%s"\n', bracket.text) - indentout(out, indent, "}") - if i < n then - out:write(",\n") - else - out:write("\n") - end - end -end - -local function dump_json_log_access(access, out, indent) - indentout(out, indent, "{\n") - indentout(out, indent + 1, '"type": "%s",\n', access.type) - indentout(out, indent + 1, '"address": %u,\n', access.address) - indentout(out, indent + 1, '"read": "%s"', hexstring(access.read)) - if access.type == "write" then - out:write(",\n") - indentout(out, indent + 1, '"written": "%s"', hexstring(access.written)) - end - if access.proof then - out:write(",\n") - indentout(out, indent + 1, '"proof": {\n') - dump_json_proof(access.proof, out, indent + 2) - indentout(out, indent + 1, "}\n") - else - out:write("\n") - end - indentout(out, indent, "}") -end - -local function dump_json_log_accesses(accesses, out, indent) - local n = #accesses - for i, access in ipairs(accesses) do - dump_json_log_access(access, out, indent) - if i < n then - out:write(",\n") - else - out:write("\n") - end - end -end - -function _M.dump_json_log(log, init_mcycle, init_uarch_cycle, final_mcycle, final_uarch_cycle, out, indent) - indent = indent or 0 - indentout(out, indent, "{\n") - indentout(out, indent + 1, '"init_mcycle": %u,\n', init_mcycle) - indentout(out, indent + 1, '"init_uarch_cycle": %u,\n', init_uarch_cycle) - indentout(out, indent + 1, '"final_mcycle": %u,\n', final_mcycle) - indentout(out, indent + 1, '"final_uarch_cycle": %u,\n', final_uarch_cycle) - indentout(out, indent + 1, '"accesses": [\n') - dump_json_log_accesses(log.accesses, out, indent + 2) - indentout(out, indent + 1, "]") - if log.log_type.annotations then - out:write(",\n") - indentout(out, indent + 1, '"notes": [\n') - dump_json_log_notes(log.notes, out, indent + 2) - indentout(out, indent + 1, "],\n") - indentout(out, indent + 1, '"brackets": [\n') - dump_json_log_brackets(log.brackets, out, indent + 2) - indentout(out, indent + 1, "]\n") - else - out:write("\n") - end - indentout(out, indent, "}") -end - function _M.parse_number(n) if not n then return nil end local base, rest = string.match(n, "^%s*(0x%x+)%s*(.-)%s*$") @@ -224,79 +138,4 @@ function _M.parse_options(s, all, keys) return options end -local function hexhash8(hash) return string.sub(hexhash(hash), 1, 8) end - -local function accessdatastring(data, data_hash, data_log2_size, address) - local data_size = 1 << data_log2_size - if data_log2_size == 3 then - if not data then return "???(no written data)" end - if data_size < #data then - -- access data is smaller than the tree leaf size - -- the logged data is the entire tree leaf, but we only need the data that was accessed - local leaf_aligned_address = (address >> cartesi.HASH_TREE_LOG2_WORD_SIZE) - << cartesi.HASH_TREE_LOG2_WORD_SIZE - local word_offset = address - leaf_aligned_address - data = data:sub(word_offset + 1, word_offset + data_size) - end - data = string.unpack(" %s\n", - i, - notes[i] or "", - ai.address, - ai.address, - read, - written - ) - end - i = i + 1 - end - end -end - return _M diff --git a/src/clua-cartesi.cpp b/src/clua-cartesi.cpp index 45cef66a9..a6451eba3 100644 --- a/src/clua-cartesi.cpp +++ b/src/clua-cartesi.cpp @@ -33,6 +33,7 @@ extern "C" { #include "cm.h" #include "htif-constants.hpp" #include "riscv-constants.hpp" +#include "step-log.hpp" #include "uarch-constants.hpp" #include "uarch-pristine.hpp" @@ -213,6 +214,8 @@ CM_API int luaopen_cartesi(lua_State *L) { clua_setintegerfield(L, CM_VERSION_MINOR, "VERSION_MINOR", -1); clua_setintegerfield(L, CM_VERSION_PATCH, "VERSION_PATCH", -1); clua_setintegerfield(L, CM_HASH_SIZE, "HASH_SIZE", -1); + clua_setintegerfield(L, CM_HASH_KECCAK256, "HASH_FUNCTION_KECCAK256", -1); + clua_setintegerfield(L, CM_HASH_SHA256, "HASH_FUNCTION_SHA256", -1); clua_setintegerfield(L, CM_MCYCLE_MAX, "MCYCLE_MAX", -1); clua_setintegerfield(L, CM_UARCH_CYCLE_MAX, "UARCH_CYCLE_MAX", -1); clua_setintegerfield(L, CM_HASH_TREE_LOG2_WORD_SIZE, "HASH_TREE_LOG2_WORD_SIZE", -1); @@ -229,18 +232,19 @@ CM_API int luaopen_cartesi(lua_State *L) { clua_setintegerfield(L, CM_UARCH_BREAK_REASON_REACHED_TARGET_CYCLE, "UARCH_BREAK_REASON_REACHED_TARGET_CYCLE", -1); clua_setintegerfield(L, CM_UARCH_BREAK_REASON_UARCH_HALTED, "UARCH_BREAK_REASON_UARCH_HALTED", -1); clua_setintegerfield(L, CM_UARCH_BREAK_REASON_CYCLE_OVERFLOW, "UARCH_BREAK_REASON_CYCLE_OVERFLOW", -1); - clua_setintegerfield(L, CM_ACCESS_LOG_TYPE_ANNOTATIONS, "ACCESS_LOG_TYPE_ANNOTATIONS", -1); - clua_setintegerfield(L, CM_ACCESS_LOG_TYPE_LARGE_DATA, "ACCESS_LOG_TYPE_LARGE_DATA", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_COMMAND_AUTOMATIC, "CMIO_YIELD_COMMAND_AUTOMATIC", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_COMMAND_MANUAL, "CMIO_YIELD_COMMAND_MANUAL", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_AUTOMATIC_REASON_PROGRESS, "CMIO_YIELD_AUTOMATIC_REASON_PROGRESS", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT, "CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT, "CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED, "CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_MANUAL_REASON_RX_REJECTED, "CMIO_YIELD_MANUAL_REASON_RX_REJECTED", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION, "CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_REASON_ADVANCE_STATE, "CMIO_YIELD_REASON_ADVANCE_STATE", -1); - clua_setintegerfield(L, CM_CMIO_YIELD_REASON_INSPECT_STATE, "CMIO_YIELD_REASON_INSPECT_STATE", -1); + clua_setintegerfield(L, CM_HTIF_DEV_HALT, "HTIF_DEV_HALT", -1); + clua_setintegerfield(L, CM_HTIF_DEV_CONSOLE, "HTIF_DEV_CONSOLE", -1); + clua_setintegerfield(L, CM_HTIF_DEV_YIELD, "HTIF_DEV_YIELD", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_CMD_AUTOMATIC, "HTIF_YIELD_CMD_AUTOMATIC", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_CMD_MANUAL, "HTIF_YIELD_CMD_MANUAL", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_AUTOMATIC_REASON_PROGRESS, "HTIF_YIELD_AUTOMATIC_REASON_PROGRESS", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT, "HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT, "HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED, "HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_MANUAL_REASON_RX_REJECTED, "HTIF_YIELD_MANUAL_REASON_RX_REJECTED", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION, "HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_REASON_ADVANCE_STATE, "HTIF_YIELD_REASON_ADVANCE_STATE", -1); + clua_setintegerfield(L, CM_HTIF_YIELD_REASON_INSPECT_STATE, "HTIF_YIELD_REASON_INSPECT_STATE", -1); clua_setintegerfield(L, CM_SHARING_NONE, "SHARING_NONE", -1); clua_setintegerfield(L, CM_SHARING_CONFIG, "SHARING_CONFIG", -1); clua_setintegerfield(L, CM_SHARING_ALL, "SHARING_ALL", -1); @@ -253,9 +257,22 @@ CM_API int luaopen_cartesi(lua_State *L) { clua_setintegerfield(L, CM_AR_RAM_START, "AR_RAM_START", -1); clua_setintegerfield(L, CM_AR_SHADOW_STATE_START, "AR_SHADOW_STATE_START", -1); clua_setintegerfield(L, CM_AR_SHADOW_STATE_LENGTH, "AR_SHADOW_STATE_LENGTH", -1); + clua_setintegerfield(L, CM_AR_SHADOW_UARCH_STATE_START, "AR_SHADOW_UARCH_STATE_START", -1); + clua_setintegerfield(L, CM_AR_SHADOW_UARCH_STATE_LENGTH, "AR_SHADOW_UARCH_STATE_LENGTH", -1); + clua_setintegerfield(L, CM_AR_UARCH_RAM_START, "AR_UARCH_RAM_START", -1); + clua_setintegerfield(L, CM_AR_UARCH_RAM_LENGTH, "AR_UARCH_RAM_LENGTH", -1); clua_setintegerfield(L, CM_AR_SHADOW_TLB_START, "AR_SHADOW_TLB_START", -1); clua_setintegerfield(L, CM_AR_PMAS_START, "AR_PMAS_START", -1); clua_setintegerfield(L, CM_AR_PMAS_LENGTH, "AR_PMAS_LENGTH", -1); + clua_setintegerfield(L, CM_AR_CLINT_START, "AR_CLINT_START", -1); + clua_setintegerfield(L, CM_AR_CLINT_LENGTH, "AR_CLINT_LENGTH", -1); + clua_setintegerfield(L, CM_AR_HTIF_START, "AR_HTIF_START", -1); + clua_setintegerfield(L, CM_AR_HTIF_LENGTH, "AR_HTIF_LENGTH", -1); + clua_setintegerfield(L, CM_AR_PLIC_START, "AR_PLIC_START", -1); + clua_setintegerfield(L, CM_AR_PLIC_LENGTH, "AR_PLIC_LENGTH", -1); + clua_setintegerfield(L, CM_AR_DTB_START, "AR_DTB_START", -1); + clua_setintegerfield(L, CM_AR_DTB_LENGTH, "AR_DTB_LENGTH", -1); + clua_setintegerfield(L, CM_AR_DRIVE_START, "AR_DRIVE_START", -1); // HTIF masks clua_setintegerfield(L, HTIF_HALT_CMD_HALT_MASK, "HTIF_HALT_CMD_HALT_MASK", -1); clua_setintegerfield(L, HTIF_CONSOLE_CMD_GETCHAR_MASK, "HTIF_CONSOLE_CMD_GETCHAR_MASK", -1); @@ -271,8 +288,8 @@ CM_API int luaopen_cartesi(lua_State *L) { clua_setintegerfield(L, UARCH_RAM_START_ADDRESS, "UARCH_RAM_START_ADDRESS", -1); clua_setintegerfield(L, UARCH_ECALL_FN_HALT, "UARCH_ECALL_FN_HALT", -1); clua_setintegerfield(L, UARCH_ECALL_FN_PUTCHAR, "UARCH_ECALL_FN_PUTCHAR", -1); - clua_setintegerfield(L, UARCH_ECALL_FN_MARK_DIRTY_PAGE, "UARCH_ECALL_FN_MARK_DIRTY_PAGE", -1); clua_setintegerfield(L, UARCH_ECALL_FN_WRITE_TLB, "UARCH_ECALL_FN_WRITE_TLB", -1); + clua_setlstringfield(L, STEP_LOG_SIGNATURE.data(), STEP_LOG_SIGNATURE.size(), "STEP_LOG_SIGNATURE", -1); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) clua_setlstringfield(L, reinterpret_cast(uarch_pristine_hash), uarch_pristine_hash_len, "UARCH_PRISTINE_STATE_HASH", -1); diff --git a/src/clua-i-machine.cpp b/src/clua-i-machine.cpp index f6a43512c..a1a643a7f 100644 --- a/src/clua-i-machine.cpp +++ b/src/clua-i-machine.cpp @@ -482,31 +482,6 @@ static const nlohmann::json &clua_get_machine_schema_dict(lua_State *L) try { {"root_hash", "Base64"}, {"sibling_hashes", "Base64Array"}, }}, - {"Access", - { - {"read", "Base64"}, - {"read_hash", "Base64"}, - {"written", "Base64"}, - {"written_hash", "Base64"}, - {"sibling_hashes", "Base64Array"}, - }}, - {"AccessArray", - { - {"items", "Access"}, - }}, - {"Bracket", - { - {"where", "ArrayIndex"}, - }}, - {"BracketArray", - { - {"items", "Bracket"}, - }}, - {"AccessLog", - { - {"accesses", "AccessArray"}, - {"brackets", "BracketArray"}, - }}, {"McycleRootHashes", { {"hashes", "Base64Array"}, @@ -635,6 +610,30 @@ static int machine_obj_index_get_node_hash(lua_State *L) { return 1; } +/// \brief This is the machine:read_revert_root_hash() method implementation. +/// \param L Lua state. +static int machine_obj_index_read_revert_root_hash(lua_State *L) { + auto &m = clua_check>(L, 1); + cm_hash revert_root_hash{}; + if (cm_read_revert_root_hash(m.get(), &revert_root_hash) != 0) { + return luaL_error(L, "%s", cm_get_last_error_message()); + } + clua_push_cm_hash(L, &revert_root_hash); + return 1; +} + +/// \brief This is the machine:write_revert_root_hash() method implementation. +/// \param L Lua state. +static int machine_obj_index_write_revert_root_hash(lua_State *L) { + auto &m = clua_check>(L, 1); + cm_hash revert_root_hash{}; + clua_check_cm_hash(L, 2, &revert_root_hash); + if (cm_write_revert_root_hash(m.get(), &revert_root_hash) != 0) { + return luaL_error(L, "%s", cm_get_last_error_message()); + } + return 0; +} + /// \brief This is the machine:read_reg() method implementation. /// \param L Lua state. static int machine_obj_index_read_reg(lua_State *L) { @@ -754,13 +753,10 @@ static int machine_obj_index_get_address_ranges(lua_State *L) { /// \param L Lua state. static int machine_obj_index_log_reset_uarch(lua_State *L) { auto &m = clua_check>(L, 1); - const int log_type = static_cast(luaL_optinteger(L, 2, 0)); - const char *log = nullptr; - if (cm_log_reset_uarch(m.get(), log_type, &log) != 0) { + if (cm_log_reset_uarch(m.get(), luaL_checkstring(L, 2)) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } - clua_push_schemed_json_table(L, log, "AccessLog"); - return 1; + return 0; } /// \brief This is the machine:run_uarch() method implementation. @@ -780,12 +776,13 @@ static int machine_obj_index_run_uarch(lua_State *L) { /// \param L Lua state. static int machine_obj_index_log_step_uarch(lua_State *L) { auto &m = clua_check>(L, 1); - const int log_type = static_cast(luaL_optinteger(L, 2, 0)); - const char *log = nullptr; - if (cm_log_step_uarch(m.get(), log_type, &log) != 0) { + const uint64_t uarch_cycle_count = luaL_checkinteger(L, 2); + const char *log_filename = luaL_checkstring(L, 3); + cm_uarch_break_reason uarch_break_reason = CM_UARCH_BREAK_REASON_FAILED; + if (cm_log_step_uarch(m.get(), uarch_cycle_count, log_filename, &uarch_break_reason) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } - clua_push_schemed_json_table(L, log, "AccessLog"); + lua_pushinteger(L, static_cast(uarch_break_reason)); return 1; } @@ -1023,11 +1020,13 @@ static int machine_obj_index_receive_cmio_request(lua_State *L) { /// \param L Lua state. static int machine_obj_index_send_cmio_response(lua_State *L) { auto &m = clua_check>(L, 1); - const auto reason = static_cast(luaL_checkinteger(L, 2)); + cm_hash revert_root_hash{}; + clua_check_cm_hash(L, 2, &revert_root_hash); + const auto reason = static_cast(luaL_checkinteger(L, 3)); size_t length{0}; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const auto *data = reinterpret_cast(luaL_checklstring(L, 3, &length)); - if (cm_send_cmio_response(m.get(), reason, data, length) != 0) { + const auto *data = reinterpret_cast(luaL_checklstring(L, 4, &length)); + if (cm_send_cmio_response(m.get(), &revert_root_hash, reason, data, length) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } return 0; @@ -1037,17 +1036,16 @@ static int machine_obj_index_send_cmio_response(lua_State *L) { /// \param L Lua state. static int machine_obj_index_log_send_cmio_response(lua_State *L) { auto &m = clua_check>(L, 1); - const auto reason = static_cast(luaL_checkinteger(L, 2)); - const int log_type = static_cast(luaL_optinteger(L, 4, 0)); + cm_hash revert_root_hash{}; + clua_check_cm_hash(L, 2, &revert_root_hash); + const auto reason = static_cast(luaL_checkinteger(L, 3)); size_t length{0}; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const auto *data = reinterpret_cast(luaL_checklstring(L, 3, &length)); - const char *log = nullptr; - if (cm_log_send_cmio_response(m.get(), reason, data, length, log_type, &log) != 0) { + const auto *data = reinterpret_cast(luaL_checklstring(L, 4, &length)); + if (cm_log_send_cmio_response(m.get(), &revert_root_hash, reason, data, length, luaL_checkstring(L, 5)) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } - clua_push_schemed_json_table(L, log, "AccessLog"); - return 1; + return 0; } /// \brief This is the machine:is_empty() method implementation. @@ -1086,7 +1084,13 @@ static int machine_obj_index_collect_mcycle_root_hashes(lua_State *L) { const uint64_t mcycle_end = luaL_checkinteger(L, 2); const uint64_t mcycle_period = luaL_checkinteger(L, 3); const uint64_t mcycle_phase = luaL_optinteger(L, 4, 0); - const auto log2_bundle_uarch_cycle_count = static_cast(luaL_optinteger(L, 5, 0)); + // Reject values that would not survive the narrowing to int32 instead of letting them wrap to a + // small, valid-looking bundle size. + const lua_Integer log2_bundle = luaL_optinteger(L, 5, 0); + if (log2_bundle < 0 || log2_bundle > INT32_MAX) { + return luaL_argerror(L, 5, "log2_bundle_mcycle_count is out of range"); + } + const auto log2_bundle_uarch_cycle_count = static_cast(log2_bundle); const char *previous_back_tree = !lua_isnil(L, 6) ? clua_check_schemed_json_string(L, 6, "BackMerkleTree") : nullptr; const char *result = nullptr; @@ -1101,12 +1105,20 @@ static int machine_obj_index_collect_mcycle_root_hashes(lua_State *L) { /// \brief This is the machine:collect_uarch_cycle_root_hashes() method implementation. /// \param L Lua state. static int machine_obj_index_collect_uarch_cycle_root_hashes(lua_State *L) { - lua_settop(L, 3); + lua_settop(L, 4); auto &m = clua_check>(L, 1); const uint64_t mcycle_end = luaL_checkinteger(L, 2); - const auto log2_bundle_uarch_cycle_count = static_cast(luaL_optinteger(L, 3, 0)); + // Reject values that would not survive the narrowing to int32 instead of letting them wrap to a + // small, valid-looking bundle size. + const lua_Integer log2_bundle = luaL_optinteger(L, 3, 0); + if (log2_bundle < 0 || log2_bundle > INT32_MAX) { + return luaL_argerror(L, 3, "log2_bundle_uarch_cycle_count is out of range"); + } + const auto log2_bundle_uarch_cycle_count = static_cast(log2_bundle); + const char *revert_uarch_tail = !lua_isnil(L, 4) ? clua_check_schemed_json_string(L, 4, "Base64Array") : nullptr; const char *result = nullptr; - if (cm_collect_uarch_cycle_root_hashes(m.get(), mcycle_end, log2_bundle_uarch_cycle_count, &result) != 0) { + if (cm_collect_uarch_cycle_root_hashes(m.get(), mcycle_end, log2_bundle_uarch_cycle_count, revert_uarch_tail, + &result) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } clua_push_schemed_json_table(L, result, "UarchCycleRootHashes"); @@ -1172,19 +1184,30 @@ static int machine_obj_index_verify_step(lua_State *L) { /// \brief This is the machine:verify_step_uarch() method implementation. /// \param L Lua state. static int machine_obj_index_verify_step_uarch(lua_State *L) { - lua_settop(L, 4); + lua_settop(L, 5); auto &m = clua_check>(L, 1); cm_hash root_hash{}; clua_check_cm_hash(L, 2, &root_hash); - const char *log = clua_check_schemed_json_string(L, 3, "AccessLog"); + const uint64_t uarch_cycle_count = luaL_checkinteger(L, 4); cm_hash target_hash{}; - clua_check_cm_hash(L, 4, &target_hash); - if (cm_verify_step_uarch(m.get(), &root_hash, log, &target_hash) != 0) { + clua_check_cm_hash(L, 5, &target_hash); + if (cm_verify_step_uarch(m.get(), &root_hash, luaL_checkstring(L, 3), uarch_cycle_count, &target_hash) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } return 0; } +/// \brief This is the machine:pretty_print_step_uarch() method implementation. +/// \param L Lua state. +static int machine_obj_index_pretty_print_step_uarch(lua_State *L) { + const char *printout = nullptr; + if (cm_pretty_print_step_uarch(luaL_checkstring(L, 2), &printout) != 0) { + return luaL_error(L, "%s", cm_get_last_error_message()); + } + lua_pushstring(L, printout); + return 1; +} + /// \brief This is the machine:verify_reset_uarch() method implementation. /// \param L Lua state. static int machine_obj_index_verify_reset_uarch(lua_State *L) { @@ -1192,10 +1215,9 @@ static int machine_obj_index_verify_reset_uarch(lua_State *L) { auto &m = clua_check>(L, 1); cm_hash root_hash{}; clua_check_cm_hash(L, 2, &root_hash); - const char *log = clua_check_schemed_json_string(L, 3, "AccessLog"); cm_hash target_hash{}; clua_check_cm_hash(L, 4, &target_hash); - if (cm_verify_reset_uarch(m.get(), &root_hash, log, &target_hash) != 0) { + if (cm_verify_reset_uarch(m.get(), &root_hash, luaL_checkstring(L, 3), &target_hash) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } return 0; @@ -1204,18 +1226,20 @@ static int machine_obj_index_verify_reset_uarch(lua_State *L) { /// \brief This is the machine:verify_send_cmio_response() method implementation. /// \param L Lua state. static int machine_obj_index_verify_send_cmio_response(lua_State *L) { - lua_settop(L, 6); + lua_settop(L, 7); auto &m = clua_check>(L, 1); - const auto reason = static_cast(luaL_checkinteger(L, 2)); + cm_hash revert_root_hash{}; + clua_check_cm_hash(L, 2, &revert_root_hash); + const auto reason = static_cast(luaL_checkinteger(L, 3)); size_t length{0}; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const auto *data = reinterpret_cast(luaL_checklstring(L, 3, &length)); + const auto *data = reinterpret_cast(luaL_checklstring(L, 4, &length)); cm_hash root_hash{}; - clua_check_cm_hash(L, 4, &root_hash); - const char *log = clua_check_schemed_json_string(L, 5, "AccessLog"); + clua_check_cm_hash(L, 5, &root_hash); cm_hash target_hash{}; - clua_check_cm_hash(L, 6, &target_hash); - if (cm_verify_send_cmio_response(m.get(), reason, data, length, &root_hash, log, &target_hash) != 0) { + clua_check_cm_hash(L, 7, &target_hash); + if (cm_verify_send_cmio_response(m.get(), &revert_root_hash, reason, data, length, &root_hash, + luaL_checkstring(L, 6), &target_hash) != 0) { return luaL_error(L, "%s", cm_get_last_error_message()); } return 0; @@ -1244,6 +1268,8 @@ static const auto machine_obj_index = cartesi::clua_make_luaL_Reg_array({ {"get_reg_address", machine_obj_index_get_reg_address}, {"get_root_hash", machine_obj_index_get_root_hash}, {"get_node_hash", machine_obj_index_get_node_hash}, + {"read_revert_root_hash", machine_obj_index_read_revert_root_hash}, + {"write_revert_root_hash", machine_obj_index_write_revert_root_hash}, {"get_runtime_config", machine_obj_index_get_runtime_config}, {"is_empty", machine_obj_index_is_empty}, {"load", machine_obj_index_load}, @@ -1273,6 +1299,7 @@ static const auto machine_obj_index = cartesi::clua_make_luaL_Reg_array({ {"verify_send_cmio_response", machine_obj_index_verify_send_cmio_response}, {"verify_step", machine_obj_index_verify_step}, {"verify_step_uarch", machine_obj_index_verify_step_uarch}, + {"pretty_print_step_uarch", machine_obj_index_pretty_print_step_uarch}, {"write_console_input", machine_obj_index_write_console_input}, {"write_memory", machine_obj_index_write_memory}, {"write_reg", machine_obj_index_write_reg}, diff --git a/src/cm.cpp b/src/cm.cpp index 0f7f59188..c90a5b043 100644 --- a/src/cm.cpp +++ b/src/cm.cpp @@ -35,7 +35,6 @@ #include #include -#include "access-log.hpp" #include "address-range-constants.hpp" #include "address-range-defines.h" #include "address-range-description.hpp" @@ -57,6 +56,7 @@ #include "os-features.hpp" #include "ranges.hpp" #include "sha-256-hasher.hpp" +#include "step-log.hpp" static std::string &get_last_err_msg_storage() { static THREAD_LOCAL std::string last_err_msg; @@ -71,17 +71,41 @@ static_assert(AR_SHADOW_REVERT_ROOT_HASH_START_DEF == CM_AR_SHADOW_REVERT_ROOT_H static_assert(AR_RAM_START_DEF == CM_AR_RAM_START); static_assert(AR_SHADOW_STATE_START_DEF == CM_AR_SHADOW_STATE_START); static_assert(AR_SHADOW_STATE_LENGTH_DEF == CM_AR_SHADOW_STATE_LENGTH); +static_assert(AR_SHADOW_UARCH_STATE_START_DEF == CM_AR_SHADOW_UARCH_STATE_START); +static_assert(AR_SHADOW_UARCH_STATE_LENGTH_DEF == CM_AR_SHADOW_UARCH_STATE_LENGTH); +static_assert(AR_UARCH_RAM_START_DEF == CM_AR_UARCH_RAM_START); +static_assert(AR_UARCH_RAM_LENGTH_DEF == CM_AR_UARCH_RAM_LENGTH); static_assert(AR_PMAS_START_DEF == CM_AR_PMAS_START); static_assert(AR_PMAS_LENGTH_DEF == CM_AR_PMAS_LENGTH); - -static_assert(HTIF_YIELD_AUTOMATIC_REASON_PROGRESS_DEF == CM_CMIO_YIELD_AUTOMATIC_REASON_PROGRESS); -static_assert(HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT_DEF == CM_CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT); -static_assert(HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT_DEF == CM_CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT); -static_assert(HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED_DEF == CM_CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED); -static_assert(HTIF_YIELD_MANUAL_REASON_RX_REJECTED_DEF == CM_CMIO_YIELD_MANUAL_REASON_RX_REJECTED); -static_assert(HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION_DEF == CM_CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION); -static_assert(HTIF_YIELD_REASON_ADVANCE_STATE_DEF == CM_CMIO_YIELD_REASON_ADVANCE_STATE); -static_assert(HTIF_YIELD_REASON_INSPECT_STATE_DEF == CM_CMIO_YIELD_REASON_INSPECT_STATE); +static_assert(AR_CLINT_START_DEF == CM_AR_CLINT_START); +static_assert(AR_CLINT_LENGTH_DEF == CM_AR_CLINT_LENGTH); +static_assert(AR_HTIF_START_DEF == CM_AR_HTIF_START); +static_assert(AR_HTIF_LENGTH_DEF == CM_AR_HTIF_LENGTH); +static_assert(AR_PLIC_START_DEF == CM_AR_PLIC_START); +static_assert(AR_PLIC_LENGTH_DEF == CM_AR_PLIC_LENGTH); +static_assert(AR_DTB_START_DEF == CM_AR_DTB_START); +static_assert(AR_DTB_LENGTH_DEF == CM_AR_DTB_LENGTH); +static_assert(AR_DRIVE_START_DEF == CM_AR_DRIVE_START); + +static_assert(cartesi::STEP_LOG_SIGNATURE.size() == CM_STEP_LOG_SIGNATURE_SIZE); +static_assert(std::string_view{cartesi::STEP_LOG_SIGNATURE.data(), cartesi::STEP_LOG_SIGNATURE.size()} == + std::string_view{CM_STEP_LOG_SIGNATURE, CM_STEP_LOG_SIGNATURE_SIZE}); + +static_assert(HTIF_YIELD_AUTOMATIC_REASON_PROGRESS_DEF == CM_HTIF_YIELD_AUTOMATIC_REASON_PROGRESS); +static_assert(HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT_DEF == CM_HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT); +static_assert(HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT_DEF == CM_HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT); +static_assert(HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED_DEF == CM_HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED); +static_assert(HTIF_YIELD_MANUAL_REASON_RX_REJECTED_DEF == CM_HTIF_YIELD_MANUAL_REASON_RX_REJECTED); +static_assert(HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION_DEF == CM_HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION); +static_assert(HTIF_YIELD_REASON_ADVANCE_STATE_DEF == CM_HTIF_YIELD_REASON_ADVANCE_STATE); +static_assert(HTIF_YIELD_REASON_INSPECT_STATE_DEF == CM_HTIF_YIELD_REASON_INSPECT_STATE); + +static_assert(HTIF_DEV_HALT_DEF == CM_HTIF_DEV_HALT); +static_assert(HTIF_DEV_CONSOLE_DEF == CM_HTIF_DEV_CONSOLE); +static_assert(HTIF_DEV_YIELD_DEF == CM_HTIF_DEV_YIELD); + +static_assert(static_cast(cartesi::hash_function_type::keccak256) == CM_HASH_KECCAK256); +static_assert(static_cast(cartesi::hash_function_type::sha256) == CM_HASH_SHA256); uint64_t cm_get_version() { return CM_VERSION_NUM; @@ -693,12 +717,15 @@ cm_error cm_collect_mcycle_root_hashes(cm_machine *m, uint64_t mcycle_end, uint6 } cm_error cm_collect_uarch_cycle_root_hashes(cm_machine *m, uint64_t mcycle_end, int32_t log2_bundle_uarch_cycle_count, - const char **result) try { + const char *revert_uarch_tail, const char **result) try { if (result == nullptr) { throw std::invalid_argument("invalid result output"); } auto *cpp_m = convert_from_c(m); - const auto cpp_res = cpp_m->collect_uarch_cycle_root_hashes(mcycle_end, log2_bundle_uarch_cycle_count); + const auto cpp_revert_uarch_tail = + cartesi::from_json(revert_uarch_tail, "revert_uarch_tail"); + const auto cpp_res = + cpp_m->collect_uarch_cycle_root_hashes(mcycle_end, log2_bundle_uarch_cycle_count, cpp_revert_uarch_tail); *result = cm_set_temp_string(cartesi::to_json(cpp_res).dump()); return cm_result_success(); } catch (...) { @@ -716,19 +743,14 @@ cm_error cm_reset_uarch(cm_machine *m) try { return cm_result_failure(); } -cm_error cm_log_reset_uarch(cm_machine *m, int32_t log_type, const char **log) try { - if (log == nullptr) { - throw std::invalid_argument("invalid access log output"); +cm_error cm_log_reset_uarch(cm_machine *m, const char *log_filename) try { + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); } auto *cpp_m = convert_from_c(m); - const cartesi::access_log::type cpp_log_type(log_type); - const cartesi::access_log cpp_log = cpp_m->log_reset_uarch(cpp_log_type); - *log = cm_set_temp_string(cartesi::to_json(cpp_log).dump()); + cpp_m->log_reset_uarch(log_filename); return cm_result_success(); } catch (...) { - if (log != nullptr) { - *log = nullptr; - } return cm_result_failure(); } @@ -764,18 +786,20 @@ cm_error cm_log_step(cm_machine *m, uint64_t mcycle_count, const char *log_filen return cm_result_failure(); } -cm_error cm_log_step_uarch(cm_machine *m, int32_t log_type, const char **log) try { - if (log == nullptr) { - throw std::invalid_argument("invalid access log output"); +cm_error cm_log_step_uarch(cm_machine *m, uint64_t uarch_cycle_count, const char *log_filename, + cm_uarch_break_reason *uarch_break_reason) try { + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); } auto *cpp_m = convert_from_c(m); - const cartesi::access_log::type cpp_log_type(log_type); - const cartesi::access_log cpp_log = cpp_m->log_step_uarch(cpp_log_type); - *log = cm_set_temp_string(cartesi::to_json(cpp_log).dump()); + const auto reason = cpp_m->log_step_uarch(uarch_cycle_count, log_filename); + if (uarch_break_reason != nullptr) { + *uarch_break_reason = static_cast(reason); + } return cm_result_success(); } catch (...) { - if (log != nullptr) { - *log = nullptr; + if (uarch_break_reason != nullptr) { + *uarch_break_reason = CM_UARCH_BREAK_REASON_FAILED; } return cm_result_failure(); } @@ -799,40 +823,52 @@ cm_error cm_verify_step(const cm_hash *root_hash_before, const char *log_filenam return cm_result_failure(); } -cm_error cm_verify_step_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log, - const cm_hash *root_hash_after) try { - if (log == nullptr) { - throw std::invalid_argument("invalid access log"); +cm_error cm_verify_step_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log_filename, + uint64_t uarch_cycle_count, const cm_hash *root_hash_after) try { + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); } - const auto cpp_log = // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - cartesi::from_json>(log, "log").value(); const cartesi::machine_hash cpp_root_hash_before = convert_from_c(root_hash_before); const cartesi::machine_hash cpp_root_hash_after = convert_from_c(root_hash_after); if (m != nullptr) { const auto *cpp_m = convert_from_c(m); - cpp_m->verify_step_uarch(cpp_root_hash_before, cpp_log, cpp_root_hash_after); + cpp_m->verify_step_uarch(cpp_root_hash_before, log_filename, uarch_cycle_count, cpp_root_hash_after); } else { - cartesi::machine::verify_step_uarch(cpp_root_hash_before, cpp_log, cpp_root_hash_after); + cartesi::machine::verify_step_uarch(cpp_root_hash_before, log_filename, uarch_cycle_count, cpp_root_hash_after); } return cm_result_success(); } catch (...) { return cm_result_failure(); } -cm_error cm_verify_reset_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log, +cm_error cm_pretty_print_step_uarch(const char *log_filename, const char **printout) try { + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); + } + if (printout == nullptr) { + throw std::invalid_argument("invalid printout output"); + } + *printout = cm_set_temp_string(cartesi::machine::pretty_print_step_uarch(log_filename)); + return cm_result_success(); +} catch (...) { + if (printout != nullptr) { + *printout = nullptr; + } + return cm_result_failure(); +} + +cm_error cm_verify_reset_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log_filename, const cm_hash *root_hash_after) try { - if (log == nullptr) { - throw std::invalid_argument("invalid access log"); + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); } - const auto cpp_log = // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - cartesi::from_json>(log, "log").value(); const cartesi::machine_hash cpp_root_hash_before = convert_from_c(root_hash_before); const cartesi::machine_hash cpp_root_hash_after = convert_from_c(root_hash_after); if (m != nullptr) { const auto *cpp_m = convert_from_c(m); - cpp_m->verify_reset_uarch(cpp_root_hash_before, cpp_log, cpp_root_hash_after); + cpp_m->verify_reset_uarch(cpp_root_hash_before, log_filename, cpp_root_hash_after); } else { - cartesi::machine::verify_reset_uarch(cpp_root_hash_before, cpp_log, cpp_root_hash_after); + cartesi::machine::verify_reset_uarch(cpp_root_hash_before, log_filename, cpp_root_hash_after); } return cm_result_success(); } catch (...) { @@ -888,6 +924,24 @@ cm_error cm_get_node_hash(const cm_machine *m, uint64_t address, int log2_size, return cm_result_failure(); } +cm_error cm_read_revert_root_hash(const cm_machine *m, cm_hash *hash) try { + const auto *cpp_m = convert_from_c(m); + const cartesi::machine_hash cpp_hash = cpp_m->read_revert_root_hash(); + convert_to_c(cpp_hash, hash); + return cm_result_success(); +} catch (...) { + return cm_result_failure(); +} + +cm_error cm_write_revert_root_hash(cm_machine *m, const cm_hash *hash) try { + auto *cpp_m = convert_from_c(m); + const cartesi::machine_hash cpp_hash = convert_from_c(hash); + cpp_m->write_revert_root_hash(cpp_hash); + return cm_result_success(); +} catch (...) { + return cm_result_failure(); +} + cm_error cm_verify_hash_tree(cm_machine *m, bool *result) try { if (result == nullptr) { throw std::invalid_argument("invalid result output"); @@ -1200,46 +1254,45 @@ cm_error cm_receive_cmio_request(const cm_machine *m, uint8_t *cmd, uint16_t *re return cm_result_failure(); } -cm_error cm_send_cmio_response(cm_machine *m, uint16_t reason, const uint8_t *data, uint64_t length) try { +cm_error cm_send_cmio_response(cm_machine *m, const cm_hash *revert_root_hash, uint16_t reason, const uint8_t *data, + uint64_t length) try { auto *cpp_m = convert_from_c(m); - cpp_m->send_cmio_response(reason, data, length); + const cartesi::machine_hash cpp_revert_root_hash = convert_from_c(revert_root_hash); + cpp_m->send_cmio_response(cpp_revert_root_hash, reason, data, length); return cm_result_success(); } catch (...) { return cm_result_failure(); } -cm_error cm_log_send_cmio_response(cm_machine *m, uint16_t reason, const uint8_t *data, uint64_t length, - int32_t log_type, const char **log) try { - if (log == nullptr) { - throw std::invalid_argument("invalid access log output"); +cm_error cm_log_send_cmio_response(cm_machine *m, const cm_hash *revert_root_hash, uint16_t reason, const uint8_t *data, + uint64_t length, const char *log_filename) try { + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); } + const cartesi::machine_hash cpp_revert_root_hash = convert_from_c(revert_root_hash); auto *cpp_m = convert_from_c(m); - const cartesi::access_log::type cpp_log_type(log_type); - const cartesi::access_log cpp_log = cpp_m->log_send_cmio_response(reason, data, length, cpp_log_type); - *log = cm_set_temp_string(cartesi::to_json(cpp_log).dump()); + cpp_m->log_send_cmio_response(cpp_revert_root_hash, reason, data, length, log_filename); return cm_result_success(); } catch (...) { - if (log != nullptr) { - *log = nullptr; - } return cm_result_failure(); } -cm_error cm_verify_send_cmio_response(const cm_machine *m, uint16_t reason, const uint8_t *data, uint64_t length, - const cm_hash *root_hash_before, const char *log, const cm_hash *root_hash_after) try { - if (log == nullptr) { - throw std::invalid_argument("invalid access log"); +cm_error cm_verify_send_cmio_response(const cm_machine *m, const cm_hash *revert_root_hash, uint16_t reason, + const uint8_t *data, uint64_t length, const cm_hash *root_hash_before, const char *log_filename, + const cm_hash *root_hash_after) try { + if (log_filename == nullptr) { + throw std::invalid_argument("invalid log_filename"); } - const auto cpp_log = // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - cartesi::from_json>(log, "log").value(); + const cartesi::machine_hash cpp_revert_root_hash = convert_from_c(revert_root_hash); const cartesi::machine_hash cpp_root_hash_before = convert_from_c(root_hash_before); const cartesi::machine_hash cpp_root_hash_after = convert_from_c(root_hash_after); if (m != nullptr) { const auto *cpp_m = convert_from_c(m); - cpp_m->verify_send_cmio_response(reason, data, length, cpp_root_hash_before, cpp_log, cpp_root_hash_after); - } else { - cartesi::machine::verify_send_cmio_response(reason, data, length, cpp_root_hash_before, cpp_log, + cpp_m->verify_send_cmio_response(cpp_revert_root_hash, reason, data, length, cpp_root_hash_before, log_filename, cpp_root_hash_after); + } else { + cartesi::machine::verify_send_cmio_response(cpp_revert_root_hash, reason, data, length, cpp_root_hash_before, + log_filename, cpp_root_hash_after); } return cm_result_success(); } catch (...) { diff --git a/src/cm.h b/src/cm.h index b8cbf68fd..685da0187 100644 --- a/src/cm.h +++ b/src/cm.h @@ -39,6 +39,11 @@ extern "C" { #define CM_MCYCLE_MAX UINT64_MAX #define CM_UARCH_CYCLE_MAX UINT64_C(1048576) +/// Binary step log signature: "CTSI" magic + version byte + 3 reserved bytes. +/// Must stay in sync with cartesi::STEP_LOG_SIGNATURE in step-log.hpp. +#define CM_STEP_LOG_SIGNATURE "CTSI\x03\x00\x00\x00" +#define CM_STEP_LOG_SIGNATURE_SIZE 8 + // ----------------------------------------------------------------------------- // API enums and structures // ----------------------------------------------------------------------------- @@ -61,10 +66,23 @@ typedef enum cm_pmas_constant { CM_AR_RAM_START = 0x80000000, CM_AR_SHADOW_STATE_START = 0x0, CM_AR_SHADOW_STATE_LENGTH = 0x8000, + CM_AR_SHADOW_UARCH_STATE_START = 0x400000, + CM_AR_SHADOW_UARCH_STATE_LENGTH = 0x1000, + CM_AR_UARCH_RAM_START = 0x600000, + CM_AR_UARCH_RAM_LENGTH = 0x200000, CM_AR_SHADOW_TLB_START = 0x1000, CM_AR_SHADOW_TLB_LENGTH = 0x6000, CM_AR_PMAS_START = 0x10000, CM_AR_PMAS_LENGTH = 0x1000, + CM_AR_CLINT_START = 0x2000000, + CM_AR_CLINT_LENGTH = 0xC0000, + CM_AR_HTIF_START = 0x40008000, + CM_AR_HTIF_LENGTH = 0x1000, + CM_AR_PLIC_START = 0x40100000, + CM_AR_PLIC_LENGTH = 0x400000, + CM_AR_DTB_START = 0x7ff00000, + CM_AR_DTB_LENGTH = 0x100000, + CM_AR_DRIVE_START = 0x80000000000000, ///< Base of flash drive PMAs (open-ended) } cm_pmas_constant; /// \brief Error codes returned from the C API. @@ -115,28 +133,29 @@ typedef enum cm_uarch_break_reason { CM_UARCH_BREAK_REASON_FAILED, } cm_uarch_break_reason; -/// \brief Access log types. -typedef enum cm_access_log_type { - CM_ACCESS_LOG_TYPE_ANNOTATIONS = 1, ///< Includes annotations - CM_ACCESS_LOG_TYPE_LARGE_DATA = 2, ///< Includes data larger than 8 bytes -} cm_access_log_type; +/// \brief HTIF device identifiers (DEV field of tohost/fromhost). +typedef enum cm_htif_device { + CM_HTIF_DEV_HALT = 0, ///< Halts the machine + CM_HTIF_DEV_CONSOLE = 1, ///< Console input and output + CM_HTIF_DEV_YIELD = 2, ///< Yield control back to the host +} cm_htif_device; /// \brief Yield device commands. typedef enum cm_cmio_yield_command { - CM_CMIO_YIELD_COMMAND_AUTOMATIC, - CM_CMIO_YIELD_COMMAND_MANUAL, + CM_HTIF_YIELD_CMD_AUTOMATIC, + CM_HTIF_YIELD_CMD_MANUAL, } cm_cmio_yield_command; /// \brief Yield reasons. typedef enum cm_cmio_yield_reason { - CM_CMIO_YIELD_AUTOMATIC_REASON_PROGRESS = 1, ///< Progress is available - CM_CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT = 2, ///< Output is available in tx buffer - CM_CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT = 4, ///< Report is available in tx buffer - CM_CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED = 1, ///< Input in rx buffer was accepted - CM_CMIO_YIELD_MANUAL_REASON_RX_REJECTED = 2, ///< Input in rx buffer was rejected - CM_CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION = 4, ///< Exception happened - CM_CMIO_YIELD_REASON_ADVANCE_STATE = 0, ///< Input in rx buffer is an advance state - CM_CMIO_YIELD_REASON_INSPECT_STATE = 1, ///< Input in rx buffer is an inspect state + CM_HTIF_YIELD_AUTOMATIC_REASON_PROGRESS = 1, ///< Progress is available + CM_HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT = 2, ///< Output is available in tx buffer + CM_HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT = 4, ///< Report is available in tx buffer + CM_HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED = 1, ///< Input in rx buffer was accepted + CM_HTIF_YIELD_MANUAL_REASON_RX_REJECTED = 2, ///< Input in rx buffer was rejected + CM_HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION = 4, ///< Exception happened + CM_HTIF_YIELD_REASON_ADVANCE_STATE = 0, ///< Input in rx buffer is an advance state + CM_HTIF_YIELD_REASON_INSPECT_STATE = 1, ///< Input in rx buffer is an inspect state } cm_cmio_yield_reason; /// \brief Sharing modes. @@ -566,6 +585,18 @@ CM_API cm_error cm_get_root_hash(const cm_machine *m, cm_hash *hash); /// \returns 0 for success, non zero code for error. CM_API cm_error cm_get_node_hash(const cm_machine *m, uint64_t address, int32_t log2_size, cm_hash *hash); +/// \brief Reads the revert root hash from the shadow state. +/// \param m Pointer to a non-empty machine object (holds a machine instance). +/// \param hash Valid pointer to cm_hash structure that receives the hash. +/// \returns 0 for success, non zero code for error. +CM_API cm_error cm_read_revert_root_hash(const cm_machine *m, cm_hash *hash); + +/// \brief Writes the revert root hash to the shadow state. +/// \param m Pointer to a non-empty machine object (holds a machine instance). +/// \param hash Revert root hash to store. +/// \returns 0 for success, non zero code for error. +CM_API cm_error cm_write_revert_root_hash(cm_machine *m, const cm_hash *hash); + /// \brief Obtains the proof for a node in the machine state hash tree. /// \param m Pointer to a non-empty machine object (holds a machine instance). /// \param address Address of target node. Must be aligned to a 2^log2_target_size boundary. @@ -793,7 +824,7 @@ CM_API cm_error cm_run_uarch(cm_machine *m, uint64_t uarch_cycle_end, cm_uarch_b /// and collect the resulting root hashes as well. As a result, all root hashes collected after the next-to-last /// reset index correspond to this fixed point. CM_API cm_error cm_collect_uarch_cycle_root_hashes(cm_machine *m, uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count, const char **result); + int32_t log2_bundle_uarch_cycle_count, const char *revert_uarch_tail, const char **result); /// \brief Resets the entire microarchitecture state to pristine values. /// \param m Pointer to a non-empty machine object (holds a machine instance). @@ -818,13 +849,16 @@ CM_API cm_error cm_receive_cmio_request(const cm_machine *m, uint8_t *cmd, uint1 /// \brief Sends a cmio response. /// \param m Pointer to a non-empty machine object (holds a machine instance). +/// \param revert_root_hash Root hash to store in the revert-root-hash shadow slot; the state to revert +/// to if the input delivered by this response is rejected. /// \param reason Reason for sending the response. /// \param data Response data to send. /// \param length Length of response data. /// \returns 0 for success, non zero code for error. /// \details This method should only be called as a response to cmio requests with manual yield command, /// where the reason is either accepted or a GIO request, may fail otherwise. -CM_API cm_error cm_send_cmio_response(cm_machine *m, uint16_t reason, const uint8_t *data, uint64_t length); +CM_API cm_error cm_send_cmio_response(cm_machine *m, const cm_hash *revert_root_hash, uint16_t reason, + const uint8_t *data, uint64_t length); // ------------------------------------ // Logging @@ -839,33 +873,32 @@ CM_API cm_error cm_send_cmio_response(cm_machine *m, uint16_t reason, const uint CM_API cm_error cm_log_step(cm_machine *m, uint64_t mcycle_count, const char *log_filename, cm_break_reason *break_reason); -/// \brief Runs the machine in the microarchitecture for one micro cycle logging all accesses to the state. +/// \brief Runs the microarchitecture for the given cycle count (or halt) writing a binary step log to a file. /// \param m Pointer to a non-empty machine object (holds a machine instance). -/// \param log_type Type of access log to generate. -/// \param log Receives the state access log as a JSON object in a string, -/// guaranteed to remain valid only until the next CM_API function is called from the same thread. +/// \param uarch_cycle_count Number of uarch cycles to advance; the run stops earlier on halt or overflow. +/// \param log_filename Path where the binary step log will be saved. +/// \param uarch_break_reason Receives the reason the step ended (can be NULL). /// \returns 0 for success, non zero code for error. -CM_API cm_error cm_log_step_uarch(cm_machine *m, int32_t log_type, const char **log); +CM_API cm_error cm_log_step_uarch(cm_machine *m, uint64_t uarch_cycle_count, const char *log_filename, + cm_uarch_break_reason *uarch_break_reason); -/// \brief Resets the entire microarchitecture state to pristine values logging all accesses to the state. +/// \brief Resets the entire microarchitecture state to pristine values writing a binary step log to a file. /// \param m Pointer to a non-empty machine object (holds a machine instance). -/// \param log_type Type of access log to generate. -/// \param log Receives the state access log as a JSON object in a string, -/// guaranteed to remain valid only until the next CM_API function is called from the same thread. +/// \param log_filename Path where the binary step log will be saved. /// \returns 0 for success, non zero code for error. -CM_API cm_error cm_log_reset_uarch(cm_machine *m, int32_t log_type, const char **log); +CM_API cm_error cm_log_reset_uarch(cm_machine *m, const char *log_filename); -/// \brief Sends a cmio response logging all accesses to the state. +/// \brief Sends a cmio response and writes a binary step log to a file. /// \param m Pointer to a non-empty machine object (holds a machine instance). +/// \param revert_root_hash Root hash stored in the revert-root-hash shadow slot, to revert to if the +/// input delivered by this response is rejected. /// \param reason Reason for sending the response. /// \param data Response data to send. /// \param length Length of response data. -/// \param log_type Type of access log to generate. -/// \param log Receives the state access log as a JSON object in a string, -/// guaranteed to remain valid only until the next CM_API function is called from the same thread. +/// \param log_filename Path where the binary step log will be saved. /// \returns 0 for success, non zero code for error. -CM_API cm_error cm_log_send_cmio_response(cm_machine *m, uint16_t reason, const uint8_t *data, uint64_t length, - int32_t log_type, const char **log); +CM_API cm_error cm_log_send_cmio_response(cm_machine *m, const cm_hash *revert_root_hash, uint16_t reason, + const uint8_t *data, uint64_t length, const char *log_filename); // ------------------------------------ // Verifying @@ -884,32 +917,43 @@ CM_API cm_error cm_verify_step(const cm_hash *root_hash_before, const char *log_ /// \brief Checks the validity of a state transition produced by cm_log_step_uarch. /// \param m Pointer to a machine object. Can be NULL (for local machines). /// \param root_hash_before State hash before step. -/// \param log State access log to be verified as a JSON object in a string. +/// \param log_filename Path to the binary step log file to be verified. +/// \param uarch_cycle_count Number of uarch cycles the caller expects to have been advanced. /// \param root_hash_after State hash after step. /// \returns 0 for success, non zero code for error. -CM_API cm_error cm_verify_step_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log, - const cm_hash *root_hash_after); +CM_API cm_error cm_verify_step_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log_filename, + uint64_t uarch_cycle_count, const cm_hash *root_hash_after); + +/// \brief Replays a uarch step log and returns a human-readable printout. +/// \param log_filename Path to a binary step log file produced by cm_log_step_uarch. +/// \param printout Receives the printout text, guaranteed to remain valid only until the next +/// CM_API function is called from the same thread. Set to NULL on failure. +/// \returns 0 for success, non zero code for error. +/// \details Replays the log purely to produce the printout; no caller belief is checked. +CM_API cm_error cm_pretty_print_step_uarch(const char *log_filename, const char **printout); /// \brief Checks the validity of a state transition produced by cm_log_reset_uarch. /// \param m Pointer to a machine object. Can be NULL (for local machines). /// \param root_hash_before State hash before reset. -/// \param log State access log to be verified as a JSON object in a string. +/// \param log_filename Path to the binary step log file to be verified. /// \param root_hash_after State hash after reset. /// \returns 0 for success, non zero code for error. -CM_API cm_error cm_verify_reset_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log, +CM_API cm_error cm_verify_reset_uarch(const cm_machine *m, const cm_hash *root_hash_before, const char *log_filename, const cm_hash *root_hash_after); /// \brief Checks the validity of a state transition produced by cm_log_send_cmio_response. /// \param m Pointer to a machine object. Can be NULL (for local machines). +/// \param revert_root_hash Root hash that was stored in the revert-root-hash shadow slot. /// \param reason Reason for sending the response. /// \param data The response sent when the log was generated. /// \param length Length of response. /// \param root_hash_before State hash before response. -/// \param log State access log to be verified as a JSON object in a string. +/// \param log_filename Path to the binary step log file to be verified. /// \param root_hash_after State hash after response. /// \returns 0 for success, non zero code for error. -CM_API cm_error cm_verify_send_cmio_response(const cm_machine *m, uint16_t reason, const uint8_t *data, uint64_t length, - const cm_hash *root_hash_before, const char *log, const cm_hash *root_hash_after); +CM_API cm_error cm_verify_send_cmio_response(const cm_machine *m, const cm_hash *revert_root_hash, uint16_t reason, + const uint8_t *data, uint64_t length, const cm_hash *root_hash_before, const char *log_filename, + const cm_hash *root_hash_after); // ------------------------------------ // Integrity checking diff --git a/src/collect-mcycle-hashes-state-access.hpp b/src/collect-mcycle-hashes-state-access.hpp index a4f546709..c928bc459 100644 --- a/src/collect-mcycle-hashes-state-access.hpp +++ b/src/collect-mcycle-hashes-state-access.hpp @@ -496,8 +496,6 @@ class collect_mcycle_hashes_state_access : return m_m.get_host_addr(paddr, pma_index); } - void do_mark_dirty_page(host_addr /* haddr */, uint64_t /* pma_index */) const {} - bool do_putchar(uint8_t c) const { // NOLINT(readability-convert-member-functions-to-static) return m_m.putchar(c); } diff --git a/src/collect-uarch-cycle-hashes-state-access.hpp b/src/collect-uarch-cycle-hashes-state-access.hpp index c418259c0..34e9bac67 100644 --- a/src/collect-uarch-cycle-hashes-state-access.hpp +++ b/src/collect-uarch-cycle-hashes-state-access.hpp @@ -122,12 +122,6 @@ class collect_uarch_cycle_hashes_state_access : return false; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - void do_mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { - (void) paddr; - (void) pma_index; - } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) constexpr const char *do_get_name() const { return "collect_uarch_cycle_hashes_state_access"; diff --git a/src/i-accept-dirty-pages.hpp b/src/i-accept-dirty-pages.hpp new file mode 100644 index 000000000..09ed14323 --- /dev/null +++ b/src/i-accept-dirty-pages.hpp @@ -0,0 +1,76 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef I_ACCEPT_DIRTY_PAGES_HPP +#define I_ACCEPT_DIRTY_PAGES_HPP + +/// \file +/// \brief Accept dirty pages interface + +#include +#include + +#include "meta.hpp" + +namespace cartesi { + +/// \class i_accept_dirty_pages +/// \brief Interface for state access classes that must mark pages dirty explicitly. +/// \tparam DERIVED Derived class implementing the interface. (An example of CRTP.) +/// \details Only a state access with a deferred store needs this. The native +/// state_access writes straight through the host pointer in do_write_memory_word +/// without marking the page, so it relies on this explicit call (and the eviction +/// marking in the machine) to keep the dirty page tree complete; record_step_state_access +/// records that same deferral. Every other context marks or hashes each page at the +/// moment of the write: native write_word marks dirty, and the microarchitecture +/// record path hashes on the spot. This is why the microarchitecture does not accept +/// dirty pages. If a batched microarchitecture write path were ever added, that +/// assumption would no longer hold and this reasoning would need to be revisited. +template +class i_accept_dirty_pages { // CRTP + i_accept_dirty_pages() = default; + friend DERIVED; + + /// \brief Returns object cast as the derived class + DERIVED &derived() { + return *static_cast(this); + } + + /// \brief Returns object cast as the derived class + const DERIVED &derived() const { + return *static_cast(this); + } + +public: + /// \brief Marks a page as dirty + /// \param paddr Target physical address within page + /// \param pma_index Index of PMA where page falls + void mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { + derived().do_mark_dirty_page(paddr, pma_index); + } +}; + +/// \brief SFINAE test implementation of the i_accept_dirty_pages interface +template +using is_an_i_accept_dirty_pages = + std::integral_constant>>; + +template +constexpr bool is_an_i_accept_dirty_pages_v = is_an_i_accept_dirty_pages::value; + +} // namespace cartesi + +#endif diff --git a/src/i-machine.hpp b/src/i-machine.hpp index 160a7e1b6..3932f99ab 100644 --- a/src/i-machine.hpp +++ b/src/i-machine.hpp @@ -21,14 +21,13 @@ #include #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" #include "hash-tree-constants.hpp" #include "hash-tree-proof.hpp" #include "hash-tree-stats.hpp" #include "interpret.hpp" -#include "machine-config-fwd.hpp" +#include "machine-config.hpp" #include "machine-hash.hpp" #include "machine-reg.hpp" #include "machine-runtime-config.hpp" @@ -119,9 +118,9 @@ class i_machine { return do_log_step(mcycle_count, filename); } - /// \brief Runs the machine for one micro cycle logging all accesses to the state. - access_log log_step_uarch(const access_log::type &log_type) { - return do_log_step_uarch(log_type); + /// \brief Runs the uarch for the given cycle count (or halt) and writes a binary step log. + uarch_interpreter_break_reason log_step_uarch(uint64_t uarch_cycle_count, const std::string &filename) { + return do_log_step_uarch(uarch_cycle_count, filename); } /// \brief Obtains the proof for a node in the hash tree. @@ -143,6 +142,16 @@ class i_machine { return do_get_node_hash(address, log2_size); } + /// \brief Reads the revert root hash from the shadow state. + machine_hash read_revert_root_hash() const { + return do_read_revert_root_hash(); + } + + /// \brief Writes the revert root hash to the shadow state. + void write_revert_root_hash(const machine_hash &hash) { + do_write_revert_root_hash(hash); + } + /// \brief Verifies integrity of hash tree. bool verify_hash_tree() const { return do_verify_hash_tree(); @@ -238,11 +247,9 @@ class i_machine { do_reset_uarch(); } - /// \brief Resets the microarchitecture state to pristine value and returns an access log - /// \param log_type Type of access log to generate. - /// \returns The state access log. - access_log log_reset_uarch(const access_log::type &log_type) { - return do_log_reset_uarch(log_type); + /// \brief Resets the microarchitecture state to pristine value and writes a step log to \p filename. + void log_reset_uarch(const std::string &filename) { + do_log_reset_uarch(filename); } /// \brief Runs the microarchitecture until the machine advances to the next mcycle or the current micro cycle @@ -253,9 +260,12 @@ class i_machine { /// \brief Collects the root hashes after every \p uarch_cycle until \p mcycle_end machine cycle, implicitly /// resetting the uarch between mcycles. - uarch_cycle_root_hashes collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) { - return do_collect_uarch_cycle_root_hashes(mcycle_end, log2_bundle_uarch_cycle_count); + /// \details The \p revert_uarch_tail holds the root hashes after each uarch cycle of the period of the + /// machine the recorded revert root hash reverts to, the last entry being the revert root hash itself. + /// It is required unless the machine starts at a fixed point other than a rejected manual yield. + uarch_cycle_root_hashes collect_uarch_cycle_root_hashes(uint64_t mcycle_end, int32_t log2_bundle_uarch_cycle_count, + const machine_hashes &revert_uarch_tail = {}) { + return do_collect_uarch_cycle_root_hashes(mcycle_end, log2_bundle_uarch_cycle_count, revert_uarch_tail); } /// \brief Returns a list of descriptions for all PMA entries registered in the machine, sorted by start @@ -264,14 +274,15 @@ class i_machine { } /// \brief Sends cmio response. - void send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) { - do_send_cmio_response(reason, data, length); + void send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length) { + do_send_cmio_response(revert_root_hash, reason, data, length); } /// \brief Sends cmio response and returns an access log - access_log log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) { - return do_log_send_cmio_response(reason, data, length, log_type); + void log_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const std::string &filename) { + do_log_send_cmio_response(revert_root_hash, reason, data, length, filename); } /// \brief Gets the address of any register @@ -291,21 +302,23 @@ class i_machine { } /// \brief Checks the validity of a state transition caused by log_step_uarch. - void verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after) const { - do_verify_step_uarch(root_hash_before, log, root_hash_after); + void verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) const { + do_verify_step_uarch(root_hash_before, filename, uarch_cycle_count, root_hash_after); } /// \brief Checks the validity of a state transition caused by log_reset_uarch. - void verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, + void verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const { - do_verify_reset_uarch(root_hash_before, log, root_hash_after); + do_verify_reset_uarch(root_hash_before, filename, root_hash_after); } /// \brief Checks the validity of state transitions caused by log_send_cmio_response. - void verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after) const { - do_verify_send_cmio_response(reason, data, length, root_hash_before, log, root_hash_after); + void verify_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const machine_hash &root_hash_before, const std::string &filename, + const machine_hash &root_hash_after) const { + do_verify_send_cmio_response(revert_root_hash, reason, data, length, root_hash_before, filename, + root_hash_after); } /// \brief Checks if implementation is jsorpc-machine @@ -327,10 +340,13 @@ class i_machine { virtual void do_clone_stored(const std::string &from_dir, const std::string &to_dir) const = 0; virtual void do_remove_stored(const std::string &dir) const = 0; virtual interpreter_break_reason do_log_step(uint64_t mcycle_count, const std::string &filename) = 0; - virtual access_log do_log_step_uarch(const access_log::type &log_type) = 0; + virtual uarch_interpreter_break_reason do_log_step_uarch(uint64_t uarch_cycle_count, + const std::string &filename) = 0; virtual hash_tree_proof do_get_proof(uint64_t address, int log2_target_size, int log2_root_size) const = 0; virtual machine_hash do_get_root_hash() const = 0; virtual machine_hash do_get_node_hash(uint64_t address, int log2_size) const = 0; + virtual machine_hash do_read_revert_root_hash() const = 0; + virtual void do_write_revert_root_hash(const machine_hash &hash) = 0; virtual uint64_t do_read_reg(reg r) const = 0; virtual void do_write_reg(reg w, uint64_t val) = 0; virtual void do_read_memory(uint64_t address, unsigned char *data, uint64_t length) const = 0; @@ -349,24 +365,26 @@ class i_machine { virtual void do_set_runtime_config(const machine_runtime_config &r) = 0; virtual void do_destroy() = 0; virtual void do_reset_uarch() = 0; - virtual access_log do_log_reset_uarch(const access_log::type &log_type) = 0; + virtual void do_log_reset_uarch(const std::string &filename) = 0; virtual uarch_interpreter_break_reason do_run_uarch(uint64_t uarch_cycle_end) = 0; virtual uarch_cycle_root_hashes do_collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) = 0; + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) = 0; virtual address_range_descriptions do_get_address_ranges() const = 0; - virtual void do_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) = 0; - virtual access_log do_log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) = 0; + virtual void do_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length) = 0; + virtual void do_log_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const std::string &filename) = 0; virtual uint64_t do_get_reg_address(reg r) const = 0; virtual machine_config do_get_default_config() const = 0; virtual interpreter_break_reason do_verify_step(const machine_hash &root_hash_before, const std::string &log_filename, uint64_t mcycle_count, const machine_hash &root_hash_after) const = 0; - virtual void do_verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, + virtual void do_verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) const = 0; + virtual void do_verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const = 0; - virtual void do_verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, + virtual void do_verify_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const = 0; - virtual void do_verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after) const = 0; virtual bool do_verify_hash_tree() const = 0; virtual bool do_is_jsonrpc_machine() const { return false; diff --git a/src/i-state-access.hpp b/src/i-state-access.hpp index 104564356..2584900a9 100644 --- a/src/i-state-access.hpp +++ b/src/i-state-access.hpp @@ -28,6 +28,7 @@ #include "address-range.hpp" #include "assert-printf.hpp" #include "i-prefer-shadow-state.hpp" +#include "machine-hash.hpp" #include "meta.hpp" #include "poor-type-name.hpp" #include "riscv-warl.hpp" @@ -36,9 +37,6 @@ namespace cartesi { -// Forward declarations -enum class bracket_type; - // Type trait that should return the fast_addr type for a state access class template struct i_state_access_fast_addr {}; @@ -344,16 +342,22 @@ class i_state_access { // CRTP return derived().do_write_memory(paddr, data, length); } - /// \brief Write a data buffer to memory padded with 0 - /// \param paddr Destination physical address. - /// \param data Pointer to source data buffer. - /// \param data_length Length of data buffer. - /// \param write_length_log2_size Log2 size of the total write length. + /// \brief Writes \p data to memory at \p paddr, zero-padding to a power of 2 total length. + /// \param paddr Destination physical address; must be aligned to (1 << write_length_log2_size). + /// \param data Pointer to the source bytes. + /// \param data_length Number of valid bytes at \p data; must not exceed (1 << write_length_log2_size). + /// \param write_length_log2_size Log2 of the total padded write length. void write_memory_with_padding(uint64_t paddr, const unsigned char *data, uint64_t data_length, int write_length_log2_size) const { derived().do_write_memory_with_padding(paddr, data, data_length, write_length_log2_size); } + /// \brief Reads the revert root hash from the shadow state. + /// \returns The hash. + machine_hash read_revert_root_hash() const { + return derived().do_read_revert_root_hash(); + } + /// \brief Reads a word from memory. /// \tparam T Type of word to read, potentially unaligned. /// \tparam A Type to which \p paddr and \p faddr are known to be aligned. @@ -465,18 +469,6 @@ class i_state_access { // CRTP return val; } - /// \brief Marks a page as dirty - /// \param faddr Implementation-defined fast address. - /// \param pma_index Index of PMA where page falls - /// \details When there is a host machine, the hash tree only updates the hashes for pages that - /// have been modified. Pages can only be written to if they appear in the write TLB. Therefore, - /// the hash tree only considers the pages that are currently in the write TLB and those that - /// have been marked dirty. When a page leaves the write TLB, it is marked dirty. - /// If the state belongs to a host machine, then this call MUST be forwarded to machine::mark_dirty_page(); - void mark_dirty_page(fast_addr faddr, uint64_t pma_index) const { - derived().do_mark_dirty_page(faddr, pma_index); - } - /// \brief Writes a character to the console /// \param c Character to output /// \returns True if console output should be flushed. diff --git a/src/i-uarch-state-access.hpp b/src/i-uarch-state-access.hpp index b22955b9e..2bc7b301b 100644 --- a/src/i-uarch-state-access.hpp +++ b/src/i-uarch-state-access.hpp @@ -147,12 +147,16 @@ class i_uarch_state_access { // CRTP return derived().do_reset_uarch(); } - bool putchar(uint8_t c) const { - return derived().do_putchar(c); + /// \brief Reverts the canonical machine state to the one whose root hash is recorded + /// in the revert root hash leaf of the shadow state. + /// \details Implementations that track a root hash read the leaf and replace it. The + /// others do nothing, the physical machine state does not revert. + void revert_state() const { + derived().do_revert_state(); } - void mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { - return derived().do_mark_dirty_page(paddr, pma_index); + bool putchar(uint8_t c) const { + return derived().do_putchar(c); } void write_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, diff --git a/src/interpret.cpp b/src/interpret.cpp index ecbcfc074..a39a18c1e 100644 --- a/src/interpret.cpp +++ b/src/interpret.cpp @@ -860,22 +860,9 @@ static FORCE_INLINE int32_t insn_get_C_SWSP_imm(uint32_t insn) { /// \param slot_index Slot index template static void flush_tlb_slot(const STATE_ACCESS a, uint64_t slot_index) { - // Make sure a valid page leaving the write TLB is marked as dirty - // We must do this BEFORE we modify the TLB entries themselves - // (Otherwise, we could stop uarch before it marks the page dirty but after - // the entry is no longer in the TLB, which would cause the hash tree to - // miss a dirty page.) - if constexpr (SET == TLB_WRITE) { - auto old_vaddr_page = a.template read_tlb_vaddr_page(slot_index); - if (old_vaddr_page == TLB_UNVERIFIED_PAGE) { - old_vaddr_page = a.template init_hot_tlb_slot(slot_index); - } - if (old_vaddr_page != TLB_INVALID_PAGE) { - auto old_pma_index = a.template read_tlb_pma_index(slot_index); - const auto old_faddr_page = old_vaddr_page + a.template read_tlb_vf_offset(slot_index); - a.mark_dirty_page(old_faddr_page, old_pma_index); - } - } + // A valid page leaving the write TLB is marked dirty by the machine itself, + // inside write_verified_tlb/write_unverified_tlb, before the outgoing slot is + // overwritten. The interpreter no longer marks it here. // We do not leave garbage behind in empty slots // (It would make state access classes trickier to implement) const auto vaddr_page = TLB_INVALID_PAGE; diff --git a/src/json-util.cpp b/src/json-util.cpp index 284964557..fd07dfe75 100644 --- a/src/json-util.cpp +++ b/src/json-util.cpp @@ -33,11 +33,9 @@ #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" #include "base64.hpp" -#include "bracket-note.hpp" #include "hash-tree-constants.hpp" #include "hash-tree-proof.hpp" #include "hash-tree-stats.hpp" @@ -644,36 +642,6 @@ static std::string hash_function_to_name(hash_function_type hf) { throw std::domain_error{"invalid hash function type"}; } -static std::string access_type_to_name(access_type at) { - switch (at) { - case access_type::read: - return "read"; - case access_type::write: - return "write"; - } - throw std::domain_error{"invalid access type"}; -} - -static std::string bracket_type_to_name(bracket_type bt) { - switch (bt) { - case bracket_type::begin: - return "begin"; - case bracket_type::end: - return "end"; - } - throw std::domain_error{"invalid bracket type"}; -} - -static bracket_type bracket_type_from_name(const std::string &name) { - if (name == "begin") { - return bracket_type::begin; - } - if (name == "end") { - return bracket_type::end; - } - throw std::domain_error{"invalid bracket type"}; -} - static std::string console_output_destination_to_name(console_output_destination dest) { switch (dest) { case console_output_destination::to_null: @@ -1158,6 +1126,17 @@ template void ju_get_opt_field(const nlohmann::json &j, const uint64_t template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, machine_hash &value, const std::string &path); +template +void ju_get_opt_field(const nlohmann::json &j, const K &key, machine_hashes &value, const std::string &path) { + ju_get_opt_vector_like_field(j, key, value, path); +} + +template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, machine_hashes &value, + const std::string &path); + +template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, machine_hashes &value, + const std::string &path); + template void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &value, const std::string &path) { @@ -1259,228 +1238,6 @@ template void ju_get_opt_field(const nlohmann::json &j, const uint64_t template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, hash_tree_stats &value, const std::string &path); -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, access_type &value, const std::string &path) { - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - if (!jk.is_string()) { - throw std::invalid_argument("\""s + path + to_string(key) + "\" not a string"); - } - const auto &v = jk.template get(); - if (v == "read") { - value = access_type::read; - return; - } - if (v == "write") { - value = access_type::write; - return; - } - throw std::invalid_argument("\""s + path + to_string(key) + "\" not an access type"); -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, access_type &value, - const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, access_type &value, - const std::string &path); - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, access_data &data, const std::string &path) { - data.clear(); - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - if (!jk.is_string()) { - throw std::invalid_argument("\""s + path + to_string(key) + "\" not a string"); - } - const auto &bin = decode_base64(jk.template get()); - std::copy(bin.begin(), bin.end(), std::back_inserter(data)); -} - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, std::optional &optional, - const std::string &path) { - optional = {}; - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - if (!jk.is_string()) { - throw std::invalid_argument("\""s + path + to_string(key) + "\" not a string"); - } - const auto &bin = decode_base64(jk.template get()); - optional.emplace(); - std::copy(bin.begin(), bin.end(), std::back_inserter(optional.value())); -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, access_data &value, - const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, access_data &value, - const std::string &path); - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, access &access, const std::string &path) { - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - if (!jk.is_object()) { - throw std::invalid_argument("\""s + path + to_string(key) + "\" not an object"); - } - const auto new_path = path + to_string(key) + "/"; - access_type type = access_type::read; - ju_get_field(jk, "type"s, type, new_path); - access.set_type(type); - uint64_t log2_size = 0; - ju_get_field(jk, "log2_size"s, log2_size, new_path); - access.set_log2_size(static_cast(log2_size)); - uint64_t address = 0; - ju_get_field(jk, "address"s, address, new_path); - access.set_address(address); - machine_hash read_hash; - ju_get_field(jk, "read_hash", read_hash, new_path); - access.set_read_hash(read_hash); - - not_default_constructible written_hash; - ju_get_opt_field(jk, "written_hash", written_hash, new_path); - if (written_hash.has_value()) { - access.set_written_hash(written_hash.value()); - } - - std::optional read; - ju_get_opt_field(jk, "read"s, read, new_path); - if (read.has_value()) { - access.set_read(std::move(read.value())); - } - std::optional written; - ju_get_opt_field(jk, "written"s, written, new_path); - if (written.has_value()) { - access.set_written(std::move(written.value())); - } - if (contains(jk, "sibling_hashes", new_path)) { - access.get_sibling_hashes().emplace(); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - auto &sibling_hashes = access.get_sibling_hashes().value(); - ju_get_vector_like_field(jk, "sibling_hashes"s, sibling_hashes, new_path); - } -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, access &value, - const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, access &value, - const std::string &path); - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, bracket_type &value, const std::string &path) { - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - if (!jk.is_string()) { - throw std::invalid_argument("\""s + path + to_string(key) + "\" not a string"); - } - value = bracket_type_from_name(jk.template get()); -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, bracket_type &value, - const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, bracket_type &value, - const std::string &path); - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, bracket_note &value, const std::string &path) { - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - if (!jk.is_object()) { - throw std::invalid_argument("\""s + path + to_string(key) + "\" not an object"); - } - const auto new_path = path + to_string(key) + "/"; - ju_get_field(jk, "type"s, value.type, new_path); - ju_get_field(jk, "where"s, value.where, new_path); - ju_get_opt_field(jk, "text"s, value.text, new_path); -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, bracket_note &value, - const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, bracket_note &value, - const std::string &path); - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &optional, - const std::string &path) { - optional = {}; - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - const auto new_path = path + to_string(key) + "/"; - bool has_annotations = false; - ju_get_field(jk, "has_annotations"s, has_annotations, new_path); - bool has_large_data = false; - ju_get_field(jk, "has_large_data"s, has_large_data, new_path); - optional.emplace(has_annotations, has_large_data); -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, - not_default_constructible &value, const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, - not_default_constructible &value, const std::string &path); - -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &optional, - const std::string &path) { - optional = {}; - if (!contains(j, key, path)) { - return; - } - const auto &jk = j[key]; - const auto new_path = path + to_string(key) + "/"; - not_default_constructible log_type; - ju_get_field(jk, "log_type"s, log_type, new_path); - if (!log_type.has_value()) { - throw std::logic_error("log_type conversion bug"); - } - std::vector accesses; - ju_get_vector_like_field(jk, "accesses"s, accesses, new_path); - for (unsigned i = 0; i < accesses.size(); ++i) { - if (!accesses[i].get_sibling_hashes().has_value()) { - throw std::invalid_argument("\""s + new_path + "accesses/" + to_string(i) + "\" missing sibling hashes"); - } - } - std::vector brackets; - std::vector notes; - if (log_type.value().has_annotations()) { - ju_get_vector_like_field(jk, "notes"s, notes, new_path); - if (notes.size() != accesses.size()) { - throw std::invalid_argument( - "size of fields \""s + new_path + "accesses\" and \"" + new_path + "notes\" do not match"); - } - ju_get_vector_like_field(jk, "brackets"s, brackets, new_path); - for (unsigned i = 0; i < brackets.size(); ++i) { - if (brackets[i].where > accesses.size()) { - throw std::invalid_argument("\""s + new_path + "brackets/" + to_string(i) + "/where\" is out of range"); - } - } - } - optional.emplace(std::move(accesses), std::move(brackets), std::move(notes), log_type.value()); -} - -template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, - not_default_constructible &value, const std::string &path); - -template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, - not_default_constructible &value, const std::string &path); - template void ju_get_opt_field(const nlohmann::json &j, const K &key, registers_state &value, const std::string &path) { if (!contains(j, key, path)) { @@ -2180,47 +1937,6 @@ void to_json(nlohmann::json &j, const hash_tree_proof &p) { {"root_hash", encode_base64(p.get_root_hash())}, {"sibling_hashes", s}}; } -void to_json(nlohmann::json &j, const access &a) { - j = nlohmann::json{ - {"type", access_type_to_name(a.get_type())}, - {"address", a.get_address()}, - {"log2_size", a.get_log2_size()}, - }; - - j["read_hash"] = encode_base64(a.get_read_hash()); - if (a.get_read().has_value()) { - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - j["read"] = encode_base64(a.get_read().value()); - } - - if (a.get_type() == access_type::write) { - if (a.get_written_hash().has_value()) { - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - j["written_hash"] = encode_base64(a.get_written_hash().value()); - } - if (a.get_written().has_value()) { - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - j["written"] = encode_base64(a.get_written().value()); - } - } - if (a.get_sibling_hashes().has_value()) { - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - const auto &sibling_hashes = a.get_sibling_hashes().value(); - // Minimum logged data size is hash-tree word size - auto data_log2_size = std::max(a.get_log2_size(), HASH_TREE_LOG2_WORD_SIZE); - auto depth = HASH_TREE_LOG2_ROOT_SIZE - data_log2_size; - nlohmann::json s = nlohmann::json::array(); - for (int i = 0; i < depth; i++) { - s.push_back(encode_base64(sibling_hashes[i])); - } - j["sibling_hashes"] = s; - } -} - -void to_json(nlohmann::json &j, const bracket_note &b) { - j = nlohmann::json{{"type", bracket_type_to_name(b.type)}, {"where", b.where}, {"text", b.text}}; -} - void to_json(nlohmann::json &j, const uarch_interpreter_break_reason &break_reason) { j = uarch_interpreter_break_reason_to_name(break_reason); } @@ -2229,28 +1945,6 @@ void to_json(nlohmann::json &j, const interpreter_break_reason &break_reason) { j = interpreter_break_reason_to_name(break_reason); } -void to_json(nlohmann::json &j, const std::vector &bs) { - j = nlohmann::json::array(); - std::ranges::transform(bs, std::back_inserter(j), [](const bracket_note &b) -> nlohmann::json { return b; }); -} - -void to_json(nlohmann::json &j, const std::vector &as) { - j = nlohmann::json::array(); - std::ranges::transform(as, std::back_inserter(j), [](const access &a) -> nlohmann::json { return a; }); -} - -void to_json(nlohmann::json &j, const access_log::type &log_type) { - j = nlohmann::json{{"has_annotations", log_type.has_annotations()}, {"has_large_data", log_type.has_large_data()}}; -} - -void to_json(nlohmann::json &j, const access_log &log) { - j = nlohmann::json{{"log_type", log.get_log_type()}, {"accesses", log.get_accesses()}}; - if (log.get_log_type().has_annotations()) { - j["notes"] = log.get_notes(); - j["brackets"] = log.get_brackets(); - } -} - void to_json(nlohmann::json &j, const backing_store_config &config) { j = nlohmann::json{{"shared", config.shared}, {"create", config.create}, {"truncate", config.truncate}, {"data_filename", config.data_filename}, {"dht_filename", config.dht_filename}, diff --git a/src/json-util.hpp b/src/json-util.hpp index cd90d616e..61f630b53 100644 --- a/src/json-util.hpp +++ b/src/json-util.hpp @@ -26,10 +26,8 @@ #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" -#include "bracket-note.hpp" #include "hash-tree-proof.hpp" #include "hash-tree-stats.hpp" #include "interpret.hpp" @@ -303,109 +301,44 @@ template void ju_get_opt_field(const nlohmann::json &j, const K &key, std::optional &optional, const std::string &path = "params/"); -/// \brief Attempts to load a hash-tree proof object from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &value, - const std::string &path = "params/"); - -/// \brief Attempts to load a page_hash_tree_cache_stats object from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, page_hash_tree_cache_stats &value, - const std::string &path = "params/"); - -/// \brief Attempts to load a hash_tree_stats object from a field in a JSON object +/// \brief Attempts to load an array of hashes from a field in a JSON object /// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) /// \param j JSON object to load from /// \param key Key to load value from /// \param value Object to store value /// \param path Path to j template -void ju_get_opt_field(const nlohmann::json &j, const K &key, hash_tree_stats &value, +void ju_get_opt_field(const nlohmann::json &j, const K &key, machine_hashes &value, const std::string &path = "params/"); -/// \brief Attempts to load an access_type name from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, access_type &value, const std::string &path = "params/"); - -/// \brief Attempts to load an access_data object from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, access_data &data, const std::string &path = "params/"); - -/// \brief Attempts to load an access_data object from a field in a JSON object +/// \brief Attempts to load a hash-tree proof object from a field in a JSON object /// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) /// \param j JSON object to load from /// \param key Key to load value from /// \param value Object to store value /// \param path Path to j template -void ju_get_opt_field(const nlohmann::json &j, const K &key, std::optional &optional, +void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &value, const std::string &path = "params/"); -/// \brief Attempts to load an access object from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, access &access, const std::string &path = "params/"); - -/// \brief Attempts to load a bracket_type name from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, bracket_type &value, const std::string &path = "params/"); - -/// \brief Attempts to load a bracket_note object from a field in a JSON object -/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) -/// \param j JSON object to load from -/// \param key Key to load value from -/// \param value Object to store value -/// \param path Path to j -template -void ju_get_opt_field(const nlohmann::json &j, const K &key, bracket_note &value, const std::string &path = "params/"); - -/// \brief Attempts to load an access_log type object from a field in a JSON object +/// \brief Attempts to load a page_hash_tree_cache_stats object from a field in a JSON object /// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) /// \param j JSON object to load from /// \param key Key to load value from /// \param value Object to store value /// \param path Path to j template -void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &optional, +void ju_get_opt_field(const nlohmann::json &j, const K &key, page_hash_tree_cache_stats &value, const std::string &path = "params/"); -/// \brief Attempts to load an access_log object from a field in a JSON object +/// \brief Attempts to load a hash_tree_stats object from a field in a JSON object /// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) /// \param j JSON object to load from /// \param key Key to load value from /// \param value Object to store value /// \param path Path to j template -void ju_get_opt_field(const nlohmann::json &j, const K &key, not_default_constructible &optional, +void ju_get_opt_field(const nlohmann::json &j, const K &key, hash_tree_stats &value, const std::string &path = "params/"); /// \brief Attempts to load a registers_state object from a field in a JSON object @@ -771,17 +704,11 @@ using base64_machine_hash = override_to_json; using base64_machine_hashes = override_to_json; // Automatic conversion functions from Cartesi types to nlohmann::json -void to_json(nlohmann::json &j, const access_log::type &log_type); void to_json(nlohmann::json &j, const base64_machine_hash &h); void to_json(nlohmann::json &j, const base64_machine_hashes &hs); void to_json(nlohmann::json &j, const hash_tree_proof &p); void to_json(nlohmann::json &j, const page_hash_tree_cache_stats &s); void to_json(nlohmann::json &j, const hash_tree_stats &s); -void to_json(nlohmann::json &j, const access &a); -void to_json(nlohmann::json &j, const bracket_note &b); -void to_json(nlohmann::json &j, const std::vector &bs); -void to_json(nlohmann::json &j, const std::vector &as); -void to_json(nlohmann::json &j, const access_log &log); void to_json(nlohmann::json &j, const interpreter_break_reason &break_reason); void to_json(nlohmann::json &j, const uarch_interpreter_break_reason &break_reason); void to_json(nlohmann::json &j, const backing_store_config &config); @@ -879,6 +806,10 @@ extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &k const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, machine_hash &value, const std::string &base = "params/"); +extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, machine_hashes &value, + const std::string &base = "params/"); +extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, machine_hashes &value, + const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, not_default_constructible &value, const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, @@ -891,38 +822,10 @@ extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &k const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, hash_tree_stats &value, const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, access_type &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, access_type &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, access_data &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, access_data &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, access &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, access &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, bracket_type &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, bracket_type &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, bracket_note &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, bracket_note &value, - const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, - not_default_constructible &value, const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, - not_default_constructible &value, const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, uint32_t &value, const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, uint16_t &value, const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, - not_default_constructible &value, const std::string &base = "params/"); -extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, - not_default_constructible &value, const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, registers_state &value, const std::string &base = "params/"); extern template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, registers_state &value, diff --git a/src/jsonrpc-discover.json b/src/jsonrpc-discover.json index 5aedd8160..cf995b6d3 100644 --- a/src/jsonrpc-discover.json +++ b/src/jsonrpc-discover.json @@ -381,6 +381,14 @@ "schema": { "$ref": "#/components/schemas/UnsignedInteger" } + }, + { + "name": "revert_uarch_tail", + "description": "Hashes appended after the final mcycle to revert the uarch to its pristine state; empty when no revert is needed", + "required": false, + "schema": { + "$ref": "#/components/schemas/Base64HashArray" + } } ], "result": { @@ -393,22 +401,30 @@ }, { "name": "machine.log_step_uarch", - "summary": "Runs the small emulator for one cycle and return a log of state accesses", + "summary": "Runs the small emulator for the given cycle count (or halt) and writes a binary step log to a file", "params": [ { - "name": "log_type", - "description": "The maximum value of the cycle counter", + "name": "uarch_cycle_count", + "description": "Number of uarch cycles to advance; the run stops earlier on halt or overflow", "required": true, "schema": { - "$ref": "#/components/schemas/AccessLogType" + "$ref": "#/components/schemas/UnsignedInteger" + } + }, + { + "name": "filename", + "description": "Filename to store the binary step log", + "required": true, + "schema": { + "type": "string" } } ], "result": { - "name": "log", - "description": "Log of state accesses", + "name": "reason", + "description": "Reason the uarch step ended", "schema": { - "$ref": "#/components/schemas/AccessLog" + "$ref": "#/components/schemas/UarchInterpreterBreakReason" } } }, @@ -418,23 +434,31 @@ "params": [ { "name": "root_hash_before", - "description": "State hash before transition described by access log", + "description": "State hash before transition described by step log", "required": true, "schema": { "$ref": "#/components/schemas/Base64Hash" } }, { - "name": "log", - "description": "Access log describing transition", + "name": "filename", + "description": "Filename containing the binary step log", "required": true, "schema": { - "$ref": "#/components/schemas/AccessLog" + "type": "string" + } + }, + { + "name": "uarch_cycle_count", + "description": "Number of uarch cycles in step", + "required": true, + "schema": { + "$ref": "#/components/schemas/UnsignedInteger" } }, { "name": "root_hash_after", - "description": "State hash after transition described by access log", + "description": "State hash after transition described by step log", "required": true, "schema": { "$ref": "#/components/schemas/Base64Hash" @@ -455,23 +479,23 @@ "params": [ { "name": "root_hash_before", - "description": "State hash before transition described by access log", + "description": "State hash before transition described by step log", "required": true, "schema": { "$ref": "#/components/schemas/Base64Hash" } }, { - "name": "log", - "description": "Access log describing transition", + "name": "filename", + "description": "Filename containing the binary step log", "required": true, "schema": { - "$ref": "#/components/schemas/AccessLog" + "type": "string" } }, { "name": "root_hash_after", - "description": "State hash after transition described by access log", + "description": "State hash after transition described by step log", "required": true, "schema": { "$ref": "#/components/schemas/Base64Hash" @@ -498,6 +522,39 @@ } } }, + { + "name": "machine.read_revert_root_hash", + "summary": "Reads the revert root hash from the shadow state", + "params": [], + "result": { + "name": "hash", + "description": "Revert root hash", + "schema": { + "$ref": "#/components/schemas/Base64Hash" + } + } + }, + { + "name": "machine.write_revert_root_hash", + "summary": "Writes the revert root hash to the shadow state", + "params": [ + { + "name": "hash", + "description": "Revert root hash to store", + "required": true, + "schema": { + "$ref": "#/components/schemas/Base64Hash" + } + } + ], + "result": { + "name": "status", + "description": "True when operation succeeded", + "schema": { + "type": "boolean" + } + } + }, { "name": "machine.get_proof", "summary": "Obtains a hash-tree proof for a range in the machine state", @@ -913,22 +970,22 @@ }, { "name": "machine.log_reset_uarch", - "summary": "Reset uarch to pristine state and return a log of state accesses", + "summary": "Reset uarch to pristine state and write a binary step log to a file", "params": [ { - "name": "log_type", - "description": "The maximum value of the cycle counter", + "name": "filename", + "description": "Filename to store the binary step log", "required": true, "schema": { - "$ref": "#/components/schemas/AccessLogType" + "type": "string" } } ], "result": { - "name": "log", - "description": "Log of state accesses", + "name": "status", + "description": "True when operation succeeded", "schema": { - "$ref": "#/components/schemas/AccessLog" + "type": "boolean" } } }, @@ -984,6 +1041,14 @@ "name": "machine.send_cmio_response", "summary": "Sends cmio response.", "params": [ + { + "name": "revert_root_hash", + "description": "Root hash to store in the revert-root-hash shadow slot; the state to revert to if the input delivered by this response is rejected.", + "required": true, + "schema": { + "$ref": "#/components/schemas/Base64Hash" + } + }, { "name": "reason", "description": "Reason for sending response", @@ -1011,8 +1076,16 @@ }, { "name": "machine.log_send_cmio_response", - "summary": "Sends cmio response and returns an access log", + "summary": "Sends cmio response and writes a binary step log to a file", "params": [ + { + "name": "revert_root_hash", + "description": "Root hash stored in the revert-root-hash shadow slot, to revert to if the input delivered by this response is rejected.", + "required": true, + "schema": { + "$ref": "#/components/schemas/Base64Hash" + } + }, { "name": "reason", "description": "Reason for sending response", @@ -1030,19 +1103,19 @@ } }, { - "name": "log_type", - "description": "The log type to generate", + "name": "filename", + "description": "Filename to store the binary step log", "required": true, "schema": { - "$ref": "#/components/schemas/AccessLogType" + "type": "string" } } ], "result": { - "name": "log", - "description": "Log of state accesses", + "name": "status", + "description": "True when operation succeeded", "schema": { - "$ref": "#/components/schemas/AccessLog" + "type": "boolean" } } }, @@ -1050,6 +1123,14 @@ "name": "machine.verify_send_cmio_response", "summary": "Verifies a state transition caused by log_send_cmio_response", "params": [ + { + "name": "revert_root_hash", + "description": "Root hash that was stored in the revert-root-hash shadow slot.", + "required": true, + "schema": { + "$ref": "#/components/schemas/Base64Hash" + } + }, { "name": "reason", "description": "Reason for sending response", @@ -1075,11 +1156,11 @@ } }, { - "name": "log", - "description": "State access log to be verified.", + "name": "filename", + "description": "Filename containing the binary step log", "required": true, "schema": { - "$ref": "#/components/schemas/AccessLog" + "type": "string" } }, { @@ -2138,40 +2219,6 @@ "$ref": "#/components/schemas/Base64Hash" } }, - "NoteArray": { - "title": "NoteArray", - "type": "array", - "items": { - "type": "string" - } - }, - "BracketArray": { - "title": "BracketArray", - "type": "array", - "items": { - "$ref": "#/components/schemas/Bracket" - } - }, - "BracketType": { - "title": "BracketType", - "enum": ["begin", "end"] - }, - "Bracket": { - "title": "Bracket", - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/BracketType" - }, - "where": { - "$ref": "#/components/schemas/UnsignedInteger" - }, - "text": { - "type": "string" - } - }, - "required": ["type", "where", "text"] - }, "Proof": { "title": "Proof", "type": "object", @@ -2204,80 +2251,6 @@ "sibling_hashes" ] }, - "Access": { - "title": "Access", - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/AccessType" - }, - "address": { - "$ref": "#/components/schemas/UnsignedInteger" - }, - "log2_size": { - "$ref": "#/components/schemas/UnsignedInteger" - }, - "read_hash": { - "$ref": "#/components/schemas/Base64Hash" - }, - "read": { - "$ref": "#/components/schemas/Base64String" - }, - "written_hash": { - "$ref": "#/components/schemas/Base64Hash" - }, - "written": { - "$ref": "#/components/schemas/Base64String" - }, - "sibling_hashes": { - "$ref": "#/components/schemas/Base64HashArray" - } - }, - "required": ["type", "address", "log2_size", "read_hash"] - }, - "AccessArray": { - "title": "AccessArray", - "type": "array", - "items": { - "$ref": "#/components/schemas/Access" - } - }, - "AccessType": { - "title": "AccessType", - "enum": ["read", "write"] - }, - "AccessLogType": { - "title": "AccessLogType", - "type": "object", - "properties": { - "has_annotations": { - "type": "boolean" - }, - "has_large_data": { - "type": "boolean" - } - }, - "required": ["has_annotations", "has_large_data"] - }, - "AccessLog": { - "title": "AccessLog", - "type": "object", - "properties": { - "log_type": { - "$ref": "#/components/schemas/AccessLogType" - }, - "accesses": { - "$ref": "#/components/schemas/AccessArray" - }, - "notes": { - "$ref": "#/components/schemas/NoteArray" - }, - "brackets": { - "$ref": "#/components/schemas/BracketArray" - } - }, - "required": ["log_type", "accesses"] - }, "SharingMode": { "title": "SharingMode", "enum": ["none", "config", "all"] diff --git a/src/jsonrpc-machine.cpp b/src/jsonrpc-machine.cpp index 56fc996bb..65e1d3e04 100644 --- a/src/jsonrpc-machine.cpp +++ b/src/jsonrpc-machine.cpp @@ -64,7 +64,6 @@ #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" #include "base64.hpp" @@ -789,13 +788,9 @@ void jsonrpc_machine::do_reset_uarch() { request("machine.reset_uarch", std::tie(), result); } -access_log jsonrpc_machine::do_log_reset_uarch(const access_log::type &log_type) { - not_default_constructible result; - request("machine.log_reset_uarch", std::tie(log_type), result); - if (!result.has_value()) { - throw std::runtime_error("jsonrpc server error: missing result"); - } - return std::move(result).value(); +void jsonrpc_machine::do_log_reset_uarch(const std::string &filename) { + bool result = false; + request("machine.log_reset_uarch", std::tie(filename), result); } machine_hash jsonrpc_machine::do_get_root_hash() const { @@ -810,6 +805,18 @@ machine_hash jsonrpc_machine::do_get_node_hash(uint64_t address, int log2_size) return hash; } +machine_hash jsonrpc_machine::do_read_revert_root_hash() const { + machine_hash hash; + request("machine.read_revert_root_hash", std::tie(), hash); + return hash; +} + +void jsonrpc_machine::do_write_revert_root_hash(const machine_hash &hash) { + bool result = false; + auto b64 = encode_base64(hash); + request("machine.write_revert_root_hash", std::tie(b64), result); +} + hash_tree_proof jsonrpc_machine::do_get_proof(uint64_t address, int log2_target_size, int log2_root_size) const { not_default_constructible result; request("machine.get_proof", std::tie(address, log2_target_size, log2_root_size), result); @@ -824,13 +831,11 @@ void jsonrpc_machine::do_replace_memory_range(const memory_range_config &new_ran request("machine.replace_memory_range", std::tie(new_range), result); } -access_log jsonrpc_machine::do_log_step_uarch(const access_log::type &log_type) { - not_default_constructible result; - request("machine.log_step_uarch", std::tie(log_type), result); - if (!result.has_value()) { - throw std::runtime_error("jsonrpc server error: missing result"); - } - return std::move(result).value(); +uarch_interpreter_break_reason jsonrpc_machine::do_log_step_uarch(uint64_t uarch_cycle_count, + const std::string &filename) { + uarch_interpreter_break_reason result = uarch_interpreter_break_reason::reached_target_cycle; + request("machine.log_step_uarch", std::tie(uarch_cycle_count, filename), result); + return result; } void jsonrpc_machine::do_destroy() { @@ -868,9 +873,11 @@ uarch_interpreter_break_reason jsonrpc_machine::do_run_uarch(uint64_t uarch_cycl } uarch_cycle_root_hashes jsonrpc_machine::do_collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) { + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) { uarch_cycle_root_hashes result; - request("machine.collect_uarch_cycle_root_hashes", std::tie(mcycle_end, log2_bundle_uarch_cycle_count), result); + const base64_machine_hashes b64_revert_uarch_tail(revert_uarch_tail); + request("machine.collect_uarch_cycle_root_hashes", + std::tie(mcycle_end, log2_bundle_uarch_cycle_count, b64_revert_uarch_tail), result); return result; } @@ -880,21 +887,20 @@ address_range_descriptions jsonrpc_machine::do_get_address_ranges() const { return result; } -void jsonrpc_machine::do_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) { +void jsonrpc_machine::do_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length) { bool result = false; std::string b64 = cartesi::encode_base64(std::span{data, length}); - request("machine.send_cmio_response", std::tie(reason, b64), result); + auto b64_revert_root_hash = encode_base64(revert_root_hash); + request("machine.send_cmio_response", std::tie(b64_revert_root_hash, reason, b64), result); } -access_log jsonrpc_machine::do_log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) { - not_default_constructible result; +void jsonrpc_machine::do_log_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const std::string &filename) { + bool result = false; std::string b64 = cartesi::encode_base64(std::span{data, length}); - request("machine.log_send_cmio_response", std::tie(reason, b64, log_type), result); - if (!result.has_value()) { - throw std::runtime_error("jsonrpc server error: missing result"); - } - return std::move(result).value(); + auto b64_revert_root_hash = encode_base64(revert_root_hash); + request("machine.log_send_cmio_response", std::tie(b64_revert_root_hash, reason, b64, filename), result); } uint64_t jsonrpc_machine::do_get_reg_address(reg r) const { @@ -919,30 +925,33 @@ interpreter_break_reason jsonrpc_machine::do_verify_step(const machine_hash &roo return result; } -void jsonrpc_machine::do_verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after) const { +void jsonrpc_machine::do_verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) const { bool result = false; auto b64_root_hash_before = encode_base64(root_hash_before); auto b64_root_hash_after = encode_base64(root_hash_after); - request("machine.verify_step_uarch", std::tie(b64_root_hash_before, log, b64_root_hash_after), result); + request("machine.verify_step_uarch", + std::tie(b64_root_hash_before, filename, uarch_cycle_count, b64_root_hash_after), result); } -void jsonrpc_machine::do_verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, +void jsonrpc_machine::do_verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const { bool result = false; auto b64_root_hash_before = encode_base64(root_hash_before); auto b64_root_hash_after = encode_base64(root_hash_after); - request("machine.verify_reset_uarch", std::tie(b64_root_hash_before, log, b64_root_hash_after), result); + request("machine.verify_reset_uarch", std::tie(b64_root_hash_before, filename, b64_root_hash_after), result); } -void jsonrpc_machine::do_verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after) const { +void jsonrpc_machine::do_verify_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const machine_hash &root_hash_before, const std::string &filename, + const machine_hash &root_hash_after) const { bool result = false; std::string b64_data = cartesi::encode_base64(std::span{data, length}); + auto b64_revert_root_hash = encode_base64(revert_root_hash); auto b64_root_hash_before = encode_base64(root_hash_before); auto b64_root_hash_after = encode_base64(root_hash_after); request("machine.verify_send_cmio_response", - std::tie(reason, b64_data, b64_root_hash_before, log, b64_root_hash_after), result); + std::tie(b64_revert_root_hash, reason, b64_data, b64_root_hash_before, filename, b64_root_hash_after), result); } bool jsonrpc_machine::do_is_jsonrpc_machine() const { diff --git a/src/jsonrpc-machine.hpp b/src/jsonrpc-machine.hpp index 11326acd0..67353b4ff 100644 --- a/src/jsonrpc-machine.hpp +++ b/src/jsonrpc-machine.hpp @@ -24,7 +24,6 @@ #include #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" #include "hash-tree-proof.hpp" @@ -132,12 +131,14 @@ class jsonrpc_machine final : public i_machine { uint64_t do_read_console_output(uint8_t *data, uint64_t max_length) override; uint64_t do_write_console_input(const uint8_t *data, uint64_t length) override; void do_reset_uarch() override; - access_log do_log_reset_uarch(const access_log::type &log_type) override; + void do_log_reset_uarch(const std::string &filename) override; machine_hash do_get_root_hash() const override; machine_hash do_get_node_hash(uint64_t address, int log2_size) const override; + machine_hash do_read_revert_root_hash() const override; + void do_write_revert_root_hash(const machine_hash &hash) override; hash_tree_proof do_get_proof(uint64_t address, int log2_target_size, int log2_root_size) const override; void do_replace_memory_range(const memory_range_config &new_range) override; - access_log do_log_step_uarch(const access_log::type &log_type) override; + uarch_interpreter_break_reason do_log_step_uarch(uint64_t uarch_cycle_count, const std::string &filename) override; machine_runtime_config do_get_runtime_config() const override; void do_set_runtime_config(const machine_runtime_config &r) override; void do_destroy() override; @@ -148,21 +149,22 @@ class jsonrpc_machine final : public i_machine { bool do_verify_hash_tree() const override; uarch_interpreter_break_reason do_run_uarch(uint64_t uarch_cycle_end) override; uarch_cycle_root_hashes do_collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) override; + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) override; address_range_descriptions do_get_address_ranges() const override; - void do_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) override; - access_log do_log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) override; + void do_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length) override; + void do_log_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const std::string &filename) override; uint64_t do_get_reg_address(reg r) const override; machine_config do_get_default_config() const override; interpreter_break_reason do_verify_step(const machine_hash &root_hash_before, const std::string &log_filename, uint64_t mcycle_count, const machine_hash &root_hash_after) const override; - void do_verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, + void do_verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) const override; + void do_verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const override; - void do_verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after) const override; - void do_verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, + void do_verify_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const override; bool do_is_jsonrpc_machine() const override; diff --git a/src/jsonrpc-remote-machine.cpp b/src/jsonrpc-remote-machine.cpp index 64aa8f318..2ac8156be 100644 --- a/src/jsonrpc-remote-machine.cpp +++ b/src/jsonrpc-remote-machine.cpp @@ -71,7 +71,6 @@ #include #endif -#include "access-log.hpp" #include "back-merkle-tree.hpp" #include "base64.hpp" #include "json-util.hpp" @@ -948,8 +947,10 @@ static json jsonrpc_machine_collect_mcycle_root_hashes(const json &j, const std: auto mcycle_phase = std::get<2>(args); auto log2_bundle_mcycle_count = std::get<3>(args); auto previous_back_tree = std::get<4>(args); + // check_int rejects values that would not survive the narrowing to int instead of letting them + // wrap to a small, valid-looking bundle size. const auto result = session->handler->machine->collect_mcycle_root_hashes(mcycle_end, mcycle_period, mcycle_phase, - static_cast(log2_bundle_mcycle_count), previous_back_tree); + check_int(log2_bundle_mcycle_count, param_name[3]), previous_back_tree); return jsonrpc_response_ok(j, result); } @@ -962,12 +963,15 @@ static json jsonrpc_machine_collect_uarch_cycle_root_hashes(const json &j, if (!session->handler->machine) { return jsonrpc_response_invalid_request(j, "no machine"); } - static const char *const param_name[] = {"mcycle_end", "log2_bundle_uarch_cycle_count"}; - auto args = parse_args(j, param_name); + static const char *const param_name[] = {"mcycle_end", "log2_bundle_uarch_cycle_count", "revert_uarch_tail"}; + auto args = parse_args>(j, param_name); auto mcycle_end = std::get<0>(args); auto log2_bundle_uarch_cycle_count = std::get<1>(args); + const auto revert_uarch_tail = std::get<2>(args).value_or(cartesi::machine_hashes{}); + // check_int rejects values that would not survive the narrowing to int instead of letting them + // wrap to a small, valid-looking bundle size. const auto result = session->handler->machine->collect_uarch_cycle_root_hashes(mcycle_end, - static_cast(log2_bundle_uarch_cycle_count)); + check_int(log2_bundle_uarch_cycle_count, param_name[1]), revert_uarch_tail); return jsonrpc_response_ok(j, result); } @@ -1104,10 +1108,10 @@ static json jsonrpc_machine_log_step_uarch_handler(const json &j, const std::sha if (!session->handler->machine) { return jsonrpc_response_invalid_request(j, "no machine"); } - static const char *const param_name[] = {"log_type"}; - auto args = parse_args>(j, param_name); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - return jsonrpc_response_ok(j, session->handler->machine->log_step_uarch(std::get<0>(args).value())); + static const char *const param_name[] = {"uarch_cycle_count", "filename"}; + auto args = parse_args(j, param_name); + auto reason = session->handler->machine->log_step_uarch(std::get<0>(args), std::get<1>(args)); + return jsonrpc_response_ok(j, reason); } /// \brief JSONRPC handler for the machine.log_step_uarch method @@ -1118,13 +1122,13 @@ static json jsonrpc_machine_log_reset_uarch_handler(const json &j, const std::sh if (!session->handler->machine) { return jsonrpc_response_invalid_request(j, "no machine"); } - static const char *const param_name[] = {"log_type"}; - auto args = parse_args>(j, param_name); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - return jsonrpc_response_ok(j, session->handler->machine->log_reset_uarch(std::get<0>(args).value())); + static const char *const param_name[] = {"filename"}; + auto args = parse_args(j, param_name); + session->handler->machine->log_reset_uarch(std::get<0>(args)); + return jsonrpc_response_ok(j); } -/// \brief JSONRPC handler for the machine.verify_send_cmio_response method +/// \brief JSONRPC handler for the machine.verify_step method /// \param j JSON request object /// \param session HTTP session /// \returns JSON response object @@ -1143,11 +1147,9 @@ static json jsonrpc_machine_verify_step_handler(const json &j, const std::shared /// \returns JSON response object static json jsonrpc_machine_verify_step_uarch_handler(const json &j, const std::shared_ptr & /*session*/) { - static const char *const param_name[] = {"root_hash_before", "log", "root_hash_after"}; - auto args = parse_args, - cartesi::machine_hash>(j, param_name); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - cartesi::machine::verify_step_uarch(std::get<0>(args), std::get<1>(args).value(), std::get<2>(args)); + static const char *const param_name[] = {"root_hash_before", "filename", "uarch_cycle_count", "root_hash_after"}; + auto args = parse_args(j, param_name); + cartesi::machine::verify_step_uarch(std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args)); return jsonrpc_response_ok(j); } @@ -1157,11 +1159,9 @@ static json jsonrpc_machine_verify_step_uarch_handler(const json &j, /// \returns JSON response object static json jsonrpc_machine_verify_reset_uarch_handler(const json &j, const std::shared_ptr & /*session*/) { - static const char *const param_name[] = {"root_hash_before", "log", "root_hash_after"}; - auto args = parse_args, - cartesi::machine_hash>(j, param_name); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - cartesi::machine::verify_reset_uarch(std::get<0>(args), std::get<1>(args).value(), std::get<2>(args)); + static const char *const param_name[] = {"root_hash_before", "filename", "root_hash_after"}; + auto args = parse_args(j, param_name); + cartesi::machine::verify_reset_uarch(std::get<0>(args), std::get<1>(args), std::get<2>(args)); return jsonrpc_response_ok(j); } @@ -1243,6 +1243,34 @@ static json jsonrpc_machine_get_node_hash_handler(const json &j, const std::shar return jsonrpc_response_ok(j, cartesi::base64_machine_hash(hash)); } +/// \brief JSONRPC handler for the machine.read_revert_root_hash method +/// \param j JSON request object +/// \param session HTTP session +/// \returns JSON response object +static json jsonrpc_machine_read_revert_root_hash_handler(const json &j, const std::shared_ptr &session) { + if (!session->handler->machine) { + return jsonrpc_response_invalid_request(j, "no machine"); + } + jsonrpc_check_no_params(j); + auto hash = session->handler->machine->read_revert_root_hash(); + return jsonrpc_response_ok(j, cartesi::base64_machine_hash(hash)); +} + +/// \brief JSONRPC handler for the machine.write_revert_root_hash method +/// \param j JSON request object +/// \param session HTTP session +/// \returns JSON response object +static json jsonrpc_machine_write_revert_root_hash_handler(const json &j, + const std::shared_ptr &session) { + if (!session->handler->machine) { + return jsonrpc_response_invalid_request(j, "no machine"); + } + static const char *const param_name[] = {"hash"}; + auto args = parse_args(j, param_name); + session->handler->machine->write_revert_root_hash(std::get<0>(args)); + return jsonrpc_response_ok(j); +} + /// \brief JSONRPC handler for the machine.read_word method /// \param j JSON request object /// \param session HTTP session @@ -1551,12 +1579,13 @@ static json jsonrpc_machine_send_cmio_response_handler(const json &j, const std: if (!session->handler->machine) { return jsonrpc_response_invalid_request(j, "no machine"); } - static const char *const param_name[] = {"reason", "data"}; - auto args = parse_args(j, param_name); - auto bin = cartesi::decode_base64(std::get<1>(args)); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - session->handler->machine->send_cmio_response(std::get<0>(args), reinterpret_cast(bin.data()), - bin.size()); + static const char *const param_name[] = {"revert_root_hash", "reason", "data"}; + auto args = parse_args(j, param_name); + auto bin = cartesi::decode_base64(std::get<2>(args)); + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + session->handler->machine->send_cmio_response(std::get<0>(args), std::get<1>(args), + reinterpret_cast(bin.data()), bin.size()); + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) return jsonrpc_response_ok(j); } @@ -1565,17 +1594,14 @@ static json jsonrpc_machine_log_send_cmio_response_handler(const json &j, if (!session->handler->machine) { return jsonrpc_response_invalid_request(j, "no machine"); } - static const char *const param_name[] = {"reason", "data", "log_type"}; - auto args = - parse_args>(j, param_name); - auto bin = cartesi::decode_base64(std::get<1>(args)); - // NOLINTBEGIN(bugprone-unchecked-optional-access) + static const char *const param_name[] = {"revert_root_hash", "reason", "data", "filename"}; + auto args = parse_args(j, param_name); + auto bin = cartesi::decode_base64(std::get<2>(args)); // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) - return jsonrpc_response_ok(j, - session->handler->machine->log_send_cmio_response(std::get<0>(args), - reinterpret_cast(bin.data()), bin.size(), std::get<2>(args).value())); + session->handler->machine->log_send_cmio_response(std::get<0>(args), std::get<1>(args), + reinterpret_cast(bin.data()), bin.size(), std::get<3>(args)); // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) - // NOLINTEND(bugprone-unchecked-optional-access) + return jsonrpc_response_ok(j); } /// \brief JSONRPC handler for the machine.verify_send_cmio_response method @@ -1584,17 +1610,17 @@ static json jsonrpc_machine_log_send_cmio_response_handler(const json &j, /// \returns JSON response object static json jsonrpc_machine_verify_send_cmio_response_handler(const json &j, const std::shared_ptr & /*session*/) { - static const char *const param_name[] = {"reason", "data", "root_hash_before", "log", "root_hash_after"}; - auto args = parse_args, cartesi::machine_hash>(j, param_name); + static const char *const param_name[] = {"revert_root_hash", "reason", "data", "root_hash_before", "filename", + "root_hash_after"}; + auto args = parse_args(j, param_name); - auto bin = cartesi::decode_base64(std::get<1>(args)); - // NOLINTBEGIN(bugprone-unchecked-optional-access) + auto bin = cartesi::decode_base64(std::get<2>(args)); // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) - cartesi::machine::verify_send_cmio_response(std::get<0>(args), reinterpret_cast(bin.data()), - bin.size(), std::get<2>(args), std::get<3>(args).value(), std::get<4>(args)); + cartesi::machine::verify_send_cmio_response(std::get<0>(args), std::get<1>(args), + reinterpret_cast(bin.data()), bin.size(), std::get<3>(args), std::get<4>(args), + std::get<5>(args)); // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) - // NOLINTEND(bugprone-unchecked-optional-access) return jsonrpc_response_ok(j); } @@ -1669,6 +1695,8 @@ static json jsonrpc_dispatch_method(const json &j, const std::shared_ptr #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" #include "hash-tree-proof.hpp" @@ -100,8 +99,9 @@ interpreter_break_reason local_machine::do_log_step(uint64_t mcycle_count, const return m_machine->log_step(mcycle_count, filename); } -access_log local_machine::do_log_step_uarch(const access_log::type &log_type) { - return get_machine()->log_step_uarch(log_type); +uarch_interpreter_break_reason local_machine::do_log_step_uarch(uint64_t uarch_cycle_count, + const std::string &filename) { + return get_machine()->log_step_uarch(uarch_cycle_count, filename); } hash_tree_proof local_machine::do_get_proof(uint64_t address, int log2_target_size, int log2_root_size) const { @@ -116,6 +116,14 @@ machine_hash local_machine::do_get_node_hash(uint64_t address, int log2_size) co return get_machine()->get_node_hash(address, log2_size); } +machine_hash local_machine::do_read_revert_root_hash() const { + return get_machine()->read_revert_root_hash(); +} + +void local_machine::do_write_revert_root_hash(const machine_hash &hash) { + get_machine()->write_revert_root_hash(hash); +} + bool local_machine::do_verify_hash_tree() const { return get_machine()->verify_hash_tree(); } @@ -193,8 +201,8 @@ void local_machine::do_reset_uarch() { get_machine()->reset_uarch(); } -access_log local_machine::do_log_reset_uarch(const access_log::type &log_type) { - return get_machine()->log_reset_uarch(log_type); +void local_machine::do_log_reset_uarch(const std::string &filename) { + get_machine()->log_reset_uarch(filename); } uarch_interpreter_break_reason local_machine::do_run_uarch(uint64_t uarch_cycle_end) { @@ -202,21 +210,22 @@ uarch_interpreter_break_reason local_machine::do_run_uarch(uint64_t uarch_cycle_ } uarch_cycle_root_hashes local_machine::do_collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) { - return get_machine()->collect_uarch_cycle_root_hashes(mcycle_end, log2_bundle_uarch_cycle_count); + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) { + return get_machine()->collect_uarch_cycle_root_hashes(mcycle_end, log2_bundle_uarch_cycle_count, revert_uarch_tail); } address_range_descriptions local_machine::do_get_address_ranges() const { return get_machine()->get_address_ranges(); } -void local_machine::do_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) { - get_machine()->send_cmio_response(reason, data, length); +void local_machine::do_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length) { + get_machine()->send_cmio_response(revert_root_hash, reason, data, length); } -access_log local_machine::do_log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) { - return get_machine()->log_send_cmio_response(reason, data, length, log_type); +void local_machine::do_log_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const std::string &filename) { + get_machine()->log_send_cmio_response(revert_root_hash, reason, data, length, filename); } uint64_t local_machine::do_get_reg_address(reg r) const { @@ -232,19 +241,21 @@ interpreter_break_reason local_machine::do_verify_step(const machine_hash &root_ return machine::verify_step(root_hash_before, log_filename, mcycle_count, root_hash_after); } -void local_machine::do_verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after) const { - machine::verify_step_uarch(root_hash_before, log, root_hash_after); +void local_machine::do_verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) const { + machine::verify_step_uarch(root_hash_before, filename, uarch_cycle_count, root_hash_after); } -void local_machine::do_verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, +void local_machine::do_verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const { - machine::verify_reset_uarch(root_hash_before, log, root_hash_after); + machine::verify_reset_uarch(root_hash_before, filename, root_hash_after); } -void local_machine::do_verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after) const { - machine::verify_send_cmio_response(reason, data, length, root_hash_before, log, root_hash_after); +void local_machine::do_verify_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const machine_hash &root_hash_before, const std::string &filename, + const machine_hash &root_hash_after) const { + machine::verify_send_cmio_response(revert_root_hash, reason, data, length, root_hash_before, filename, + root_hash_after); } } // namespace cartesi diff --git a/src/local-machine.hpp b/src/local-machine.hpp index 6837559fc..f6a6dc671 100644 --- a/src/local-machine.hpp +++ b/src/local-machine.hpp @@ -21,7 +21,6 @@ #include #include -#include "access-log.hpp" #include "address-range-description.hpp" #include "back-merkle-tree.hpp" #include "hash-tree-proof.hpp" @@ -62,10 +61,12 @@ class local_machine final : public i_machine { void do_store(const std::string &directory, sharing_mode sharing) const override; void do_clone_stored(const std::string &from_dir, const std::string &to_dir) const override; void do_remove_stored(const std::string &dir) const override; - access_log do_log_step_uarch(const access_log::type &log_type) override; + uarch_interpreter_break_reason do_log_step_uarch(uint64_t uarch_cycle_count, const std::string &filename) override; hash_tree_proof do_get_proof(uint64_t address, int log2_target_size, int log2_root_size) const override; machine_hash do_get_root_hash() const override; machine_hash do_get_node_hash(uint64_t address, int log2_size) const override; + machine_hash do_read_revert_root_hash() const override; + void do_write_revert_root_hash(const machine_hash &hash) override; bool do_verify_hash_tree() const override; uint64_t do_read_reg(reg r) const override; void do_write_reg(reg w, uint64_t val) override; @@ -85,24 +86,25 @@ class local_machine final : public i_machine { void do_set_runtime_config(const machine_runtime_config &r) override; void do_destroy() override; void do_reset_uarch() override; - access_log do_log_reset_uarch(const access_log::type &log_type) override; + void do_log_reset_uarch(const std::string &filename) override; uarch_interpreter_break_reason do_run_uarch(uint64_t uarch_cycle_end) override; uarch_cycle_root_hashes do_collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) override; + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) override; address_range_descriptions do_get_address_ranges() const override; - void do_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) override; - access_log do_log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) override; + void do_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length) override; + void do_log_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const std::string &filename) override; uint64_t do_get_reg_address(reg r) const override; machine_config do_get_default_config() const override; interpreter_break_reason do_verify_step(const machine_hash &root_hash_before, const std::string &log_filename, uint64_t mcycle_count, const machine_hash &root_hash_after) const override; - void do_verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, + void do_verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) const override; + void do_verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const override; - void do_verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after) const override; - void do_verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, + void do_verify_send_cmio_response(const machine_hash &revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) const override; machine *get_machine(); diff --git a/src/machine-hash.hpp b/src/machine-hash.hpp index c539040ba..5233b2b20 100644 --- a/src/machine-hash.hpp +++ b/src/machine-hash.hpp @@ -23,7 +23,8 @@ #include #include #include -#ifndef ZKARCHITECTURE +// std::vector is hosted-only; the freestanding guest builds (uarch, zkVM) must not pull it in. +#if !defined(MICROARCHITECTURE) && !defined(ZKARCHITECTURE) #include #endif @@ -33,7 +34,7 @@ static constexpr size_t MACHINE_HASH_SIZE = 32; using machine_hash = std::array; using machine_hash_view = std::span; using const_machine_hash_view = std::span; -#ifndef ZKARCHITECTURE +#if !defined(MICROARCHITECTURE) && !defined(ZKARCHITECTURE) using machine_hashes = std::vector; #endif diff --git a/src/machine.cpp b/src/machine.cpp index 07aa6328b..2252143f5 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -31,7 +31,6 @@ #include #include -#include "access-log.hpp" #include "address-range-constants.hpp" #include "address-range.hpp" #include "algorithm.hpp" @@ -63,9 +62,8 @@ #include "pmas-constants.hpp" #include "pmas.hpp" #include "processor-state.hpp" -#include "record-send-cmio-state-access.hpp" #include "record-step-state-access.hpp" -#include "replay-send-cmio-state-access.hpp" +#include "rejected-manual-yield.hpp" #include "replay-step-state-access.hpp" #include "riscv-constants.hpp" #include "rtc.hpp" @@ -76,14 +74,15 @@ #include "shadow-tlb.hpp" #include "shadow-uarch-state.hpp" #include "state-access.hpp" +#include "step-pretty-printer.hpp" #include "strict-aliasing.hpp" #include "translate-virtual-address.hpp" #include "uarch-constants.hpp" #include "uarch-cycle-root-hashes.hpp" #include "uarch-interpret.hpp" #include "uarch-pristine.hpp" -#include "uarch-record-state-access.hpp" -#include "uarch-replay-state-access.hpp" +#include "uarch-record-step-state-access.hpp" +#include "uarch-replay-step-state-access.hpp" #include "uarch-reset-state.hpp" #include "uarch-state-access.hpp" #include "uarch-step.hpp" @@ -1403,25 +1402,29 @@ uint64_t machine::get_reg_address(reg r) { throw std::domain_error{"invalid register"}; } -void machine::mark_write_tlb_dirty_pages() const { - for (uint64_t slot_index = 0; slot_index < TLB_SET_SIZE; ++slot_index) { - auto vaddr_page = m_s->penumbra.tlb[TLB_WRITE][slot_index].vaddr_page; - if (vaddr_page == TLB_UNVERIFIED_PAGE) { - vaddr_page = init_hot_tlb_slot(TLB_WRITE, slot_index); +void machine::mark_write_tlb_dirty_page(uint64_t slot_index) const { + auto vaddr_page = m_s->penumbra.tlb[TLB_WRITE][slot_index].vaddr_page; + if (vaddr_page == TLB_UNVERIFIED_PAGE) { + vaddr_page = init_hot_tlb_slot(TLB_WRITE, slot_index); + } + if (vaddr_page != TLB_INVALID_PAGE) { + const auto &shadow_slot = m_s->shadow.tlb[TLB_WRITE][slot_index]; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + auto &ar = const_cast(read_pma(shadow_slot.pma_index)); + if (!ar.is_memory()) { + throw std::runtime_error{"could not mark dirty page for a TLB entry: TLB is corrupt"}; } - if (vaddr_page != TLB_INVALID_PAGE) { - const auto &shadow_slot = m_s->shadow.tlb[TLB_WRITE][slot_index]; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) - auto &ar = const_cast(read_pma(shadow_slot.pma_index)); - if (!ar.is_memory()) { - throw std::runtime_error{"could not mark dirty page for a TLB entry: TLB is corrupt"}; - } - auto paddr_page = vaddr_page + shadow_slot.vp_offset; - if (!ar.contains_absolute(paddr_page, AR_PAGE_SIZE)) { - throw std::runtime_error{"could not mark dirty page for a TLB entry: TLB is corrupt"}; - } - ar.get_dirty_page_tree().mark_dirty_page_and_up(paddr_page - ar.get_start()); + auto paddr_page = vaddr_page + shadow_slot.vp_offset; + if (!ar.contains_absolute(paddr_page, AR_PAGE_SIZE)) { + throw std::runtime_error{"could not mark dirty page for a TLB entry: TLB is corrupt"}; } + ar.get_dirty_page_tree().mark_dirty_page_and_up(paddr_page - ar.get_start()); + } +} + +void machine::mark_write_tlb_dirty_pages() const { + for (uint64_t slot_index = 0; slot_index < TLB_SET_SIZE; ++slot_index) { + mark_write_tlb_dirty_page(slot_index); } } @@ -1438,6 +1441,14 @@ machine_hash machine::get_root_hash() const { return m_ht.get_root_hash(); } +machine_hash machine::read_revert_root_hash() const { + return m_s->shadow.revert_root_hash; +} + +void machine::write_revert_root_hash(const_machine_hash_view hash) { + std::ranges::copy(hash, m_s->shadow.revert_root_hash.begin()); +} + const char *machine::get_what_name(uint64_t paddr) { if (paddr >= AR_UARCH_RAM_START && paddr - AR_UARCH_RAM_START < AR_UARCH_RAM_LENGTH) { return "uarch.ram"; @@ -1598,6 +1609,9 @@ void machine::write_memory(uint64_t paddr, const unsigned char *data, uint64_t l // Handle special case for writing to shadow memory, allowing manual snapshots // for machines with shared layouts via read_memory()/write_memory() if (paddr == AR_SHADOW_STATE_START && length == AR_SHADOW_STATE_LENGTH) { + // This bypasses write_verified_tlb/write_unverified_tlb, where outgoing write-TLB pages are + // now marked dirty, so mark the resident ones before the overwrite discards their mappings. + mark_write_tlb_dirty_pages(); // Overwrite the processor shadow state with the provided data static_assert(AR_SHADOW_STATE_LENGTH == sizeof(m_s->shadow)); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) @@ -1795,41 +1809,89 @@ void machine::write_word(uint64_t paddr, uint64_t val) { ar.get_dirty_page_tree().mark_dirty_page_and_up(offset); } -void machine::send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length) { +void machine::check_pending_cmio_request(const_machine_hash_view revert_root_hash, uint16_t reason, + uint64_t length) const { + // The core send_cmio_response cannot fail. It turns detected failures into no-ops, so the + // honest party can always log and prove the resulting state transition. The host-facing + // send refuses these no-ops upfront instead. The checks run before any state changes, so + // a failed call leaves the machine unchanged. + if (read_reg(reg::iflags_Y) == 0) { + throw std::invalid_argument{"iflags.Y is not set"}; + } + if (length > AR_CMIO_RX_BUFFER_LENGTH) { + throw std::invalid_argument{"CMIO response data is too large"}; + } + // Only advance-state responses are checked further. They are the input boundary of the + // rollups flow, whose revert-on-reject scheme depends on the preconditions below. + // Inspect-state queries and GIO responses get no further checks. + if (reason != HTIF_YIELD_REASON_ADVANCE_STATE) { + return; + } + // The machine must be waiting for an input on an rx-accepted manual yield. + if (read_reg(reg::htif_tohost_dev) != HTIF_DEV_YIELD || read_reg(reg::htif_tohost_cmd) != HTIF_YIELD_CMD_MANUAL || + read_reg(reg::htif_tohost_reason) != HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED) { + throw std::invalid_argument{"machine is not waiting on an rx-accepted manual yield"}; + } + // The recorded revert root hash must be the hash of the machine about to receive the + // input, the state a rejection later reverts to + if (!std::ranges::equal(revert_root_hash, get_root_hash())) { + throw std::invalid_argument{"revert root hash does not match the machine root hash"}; + } +} + +void machine::send_cmio_response(const_machine_hash_view revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length) { + check_pending_cmio_request(revert_root_hash, reason, length); const state_access a(*this); - cartesi::send_cmio_response(a, reason, data, length); + cartesi::send_cmio_response(a, revert_root_hash, reason, data, length); } -access_log machine::log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type) { +void machine::log_send_cmio_response(const_machine_hash_view revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const std::string &filename) { if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { throw std::runtime_error{ - "access logs can only be used with hash tree configured with Keccak-256 hash function"}; + "send cmio response logs can only be used with hash tree configured with Keccak-256 hash function"}; } - auto root_hash_before = get_root_hash(); - access_log log(log_type); - // Call send_cmio_response with the recording state accessor - const record_send_cmio_state_access a(*this, log); - { - [[maybe_unused]] auto note = a.make_scoped_note("send_cmio_response"); - cartesi::send_cmio_response(a, reason, data, length); + // The shared send_cmio_response core takes a uint32 data length (it transpiles to Solidity), so a + // larger length would be silently narrowed and could log a transition that differs from these + // arguments. The host-facing send is bounded earlier by check_pending_cmio_request's rx-buffer check. + if (length > UINT32_MAX) { + throw std::invalid_argument{"CMIO response data length does not fit in 32 bits"}; } + auto root_hash_before = get_root_hash(); + record_step_state_access::context context(filename, m_c.hash_tree.hash_function); + record_step_state_access a(context, *this); + cartesi::send_cmio_response(a, revert_root_hash, reason, data, length); + // send_cmio_response is not a step. Even when it no-ops on a machine paused on a rejected input, + // its transition is the identity, so the post-operation hash is the plain machine root hash with no + // revert substitution. auto root_hash_after = get_root_hash(); - verify_send_cmio_response(reason, data, length, root_hash_before, log, root_hash_after); - return log; + a.finish(root_hash_before, 0, root_hash_after); + verify_send_cmio_response(revert_root_hash, reason, data, length, root_hash_before, filename, root_hash_after); } -void machine::verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after) { - replay_send_cmio_state_access::context context{log, root_hash_before, hash_function_type::keccak256}; - // Verify all intermediate state transitions - replay_send_cmio_state_access a(context); - cartesi::send_cmio_response(a, reason, data, length); - a.finish(); - // Make sure the access log ends at the same root hash as the state - auto obtained_root_hash = a.get_root_hash(); - if (obtained_root_hash != root_hash_after) { - throw std::invalid_argument{"mismatch in root hash after replay"}; +void machine::verify_send_cmio_response(const_machine_hash_view revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const machine_hash &root_hash_before, const std::string &filename, + const machine_hash &root_hash_after) { + // See log_send_cmio_response: the core narrows length to uint32, so reject what would not fit. + if (length > UINT32_MAX) { + throw std::invalid_argument{"CMIO response data length does not fit in 32 bits"}; + } + auto data_length = os::file_size(filename); + auto mapped_data = os::mapped_memory(data_length, os::mapped_memory_flags{}, filename); + replay_step_state_access::context context; + replay_step_state_access a(context, mapped_data.get_ptr(), data_length); + if (context.log.root_hash_before != root_hash_before) { + throw std::runtime_error("root hash before mismatch: argument does not match step log header"); + } + if (context.log.requested_cycle_count != 0) { + throw std::runtime_error("requested_cycle_count must be zero in send_cmio_response log"); + } + cartesi::send_cmio_response(a, revert_root_hash, reason, data, length); + // send_cmio_response is not a step: a no-op on a rejected machine is the identity, never a revert + a.finish(false); + if (context.log.root_hash_after != root_hash_after) { + throw std::runtime_error("root hash after mismatch: argument does not match step log header"); } } @@ -1852,42 +1914,48 @@ void machine::reset_uarch() { } } -access_log machine::log_reset_uarch(const access_log::type &log_type) { +void machine::log_reset_uarch(const std::string &filename) { if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { throw std::runtime_error{ "microarchitecture can only be used with hash tree configured with Keccak-256 hash function"}; } - const machine_hash root_hash_before = get_root_hash(); - // Call uarch_reset_state with a uarch_record_state_access object - access_log log(log_type); - uarch_record_state_access a(*this, log); - { - [[maybe_unused]] auto note = a.make_scoped_note("reset_uarch_state"); - uarch_reset_state(a); + auto root_hash_before = get_root_hash(); + uarch_record_step_state_access::context context(filename, m_c.hash_tree.hash_function); + uarch_record_step_state_access a(context, *this); + uarch_reset_state(a); + // get_root_hash() also updates the hash tree, which finish() relies on to record node hashes. + // When the machine has rejected an input, the canonical root hash after the reset is the recorded + // revert root hash (the physical machine only has its uarch reset; iflags.Y and htif.tohost are + // unchanged), so the computed value is replaced -- but the tree update must still happen first. + auto root_hash_after = get_root_hash(); + const state_access sa(*this); + if (is_rejected_manual_yield(sa)) { + root_hash_after = read_revert_root_hash(); } - const auto root_hash_after = get_root_hash(); - verify_reset_uarch(root_hash_before, log, root_hash_after); - return log; + a.finish(root_hash_before, 0, root_hash_after); + verify_reset_uarch(root_hash_before, filename, root_hash_after); } -void machine::verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, +void machine::verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after) { - // Verify all intermediate state transitions - uarch_replay_state_access::context context{log, root_hash_before}; - uarch_replay_state_access a(context); + auto data_length = os::file_size(filename); + auto mapped_data = os::mapped_memory(data_length, os::mapped_memory_flags{}, filename); + uarch_replay_step_state_access<>::context context; + uarch_replay_step_state_access<> a(context, mapped_data.get_ptr(), data_length); + if (context.log.root_hash_before != root_hash_before) { + throw std::runtime_error("root hash before mismatch: argument does not match step log header"); + } + if (context.log.requested_cycle_count != 0) { + throw std::runtime_error("requested_cycle_count must be zero in reset_uarch log"); + } uarch_reset_state(a); a.finish(); - // Make sure the access log ends at the same root hash as the state - const machine_hash obtained_root_hash = a.get_root_hash(); - if (obtained_root_hash != root_hash_after) { - throw std::invalid_argument{"mismatch in root hash after replay"}; + if (context.log.root_hash_after != root_hash_after) { + throw std::runtime_error("root hash after mismatch: argument does not match step log header"); } } -// Declaration of explicit instantiation in module uarch-step.cpp -extern template UArchStepStatus uarch_step(uarch_record_state_access &a); - -access_log machine::log_step_uarch(const access_log::type &log_type) { +uarch_interpreter_break_reason machine::log_step_uarch(uint64_t uarch_cycle_count, const std::string &filename) { if (is_unreproducible()) { throw std::runtime_error("microarchitecture cannot be used with unreproducible machines"); } @@ -1896,36 +1964,46 @@ access_log machine::log_step_uarch(const access_log::type &log_type) { "microarchitecture can only be used with hash tree configured with Keccak-256 hash function"}; } auto root_hash_before = get_root_hash(); - access_log log(log_type); - // Call interpret with a logged state access object - const uarch_record_state_access a(*this, log); - { - [[maybe_unused]] auto note = a.make_scoped_note("step"); - uarch_step(a); - } - // Verify access log before returning + uarch_record_step_state_access::context context(filename, m_c.hash_tree.hash_function); + uarch_record_step_state_access a(context, *this); + const uint64_t uarch_cycle_end = saturating_add(a.read_uarch_cycle(), uarch_cycle_count); + const auto break_reason = uarch_interpret(a, uarch_cycle_end); auto root_hash_after = get_root_hash(); - verify_step_uarch(root_hash_before, log, root_hash_after); - return log; + a.finish(root_hash_before, uarch_cycle_count, root_hash_after); + verify_step_uarch(root_hash_before, filename, uarch_cycle_count, root_hash_after); + return break_reason; } -// Declaration of explicit instantiation in module uarch-step.cpp -extern template UArchStepStatus uarch_step(uarch_replay_state_access &a); - -void machine::verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after) { - // Verify all intermediate state transitions - uarch_replay_state_access::context context{log, root_hash_before}; - uarch_replay_state_access a(context); - uarch_step(a); +void machine::verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after) { + auto data_length = os::file_size(filename); + auto mapped_data = os::mapped_memory(data_length, os::mapped_memory_flags{}, filename); + uarch_replay_step_state_access<>::context context; + uarch_replay_step_state_access<> a(context, mapped_data.get_ptr(), data_length); + if (context.log.root_hash_before != root_hash_before) { + throw std::runtime_error("root hash before mismatch: argument does not match step log header"); + } + if (context.log.requested_cycle_count != uarch_cycle_count) { + throw std::runtime_error("uarch cycle count mismatch: argument does not match step log header"); + } + const uint64_t uarch_cycle_end = saturating_add(a.read_uarch_cycle(), context.log.requested_cycle_count); + uarch_interpret(a, uarch_cycle_end); a.finish(); - // Make sure the access log ends at the same root hash as the state - auto obtained_root_hash = a.get_root_hash(); - if (obtained_root_hash != root_hash_after) { - throw std::invalid_argument{"mismatch in root hash after replay"}; + if (context.log.root_hash_after != root_hash_after) { + throw std::runtime_error("root hash after mismatch: argument does not match step log header"); } } +std::string machine::pretty_print_step_uarch(const std::string &filename) { + auto data_length = os::file_size(filename); + auto mapped_data = os::mapped_memory(data_length, os::mapped_memory_flags{}, filename); + uarch_replay_step_state_access::context context; + const uarch_replay_step_state_access a(context, mapped_data.get_ptr(), data_length); + const uint64_t uarch_cycle_end = saturating_add(a.read_uarch_cycle(), context.log.requested_cycle_count); + uarch_interpret(a, uarch_cycle_end); + return context.printer.str(); +} + machine_config machine::get_default_config() { return machine_config{}; } @@ -1977,7 +2055,14 @@ interpreter_break_reason machine::log_step(uint64_t mcycle_count, const std::str record_step_state_access a(context, *this); const uint64_t mcycle_end = saturating_add(a.read_mcycle(), mcycle_count); auto break_reason = interpret(a, mcycle_end); + // get_root_hash() also updates the hash tree, which finish() relies on to record node/page hashes. + // When the machine has rejected an input, the canonical root hash after the step is the recorded + // revert root hash, so the computed value is replaced -- but the tree update must still happen first. auto root_hash_after = get_root_hash(); + const state_access sa(*this); + if (is_rejected_manual_yield(sa)) { + root_hash_after = read_revert_root_hash(); + } a.finish(root_hash_before, mcycle_count, root_hash_after); verify_step(root_hash_before, filename, mcycle_count, root_hash_after); return break_reason; @@ -1991,16 +2076,16 @@ interpreter_break_reason machine::verify_step(const machine_hash &root_hash_befo // Constructor reads log header, validates computed initial hash == logged initial hash replay_step_state_access a(context, mapped_data.get_ptr(), data_length); // logged initial hash matches computed initial hash - if (context.logged_root_hash_before != root_hash_before) { + if (context.log.root_hash_before != root_hash_before) { throw std::runtime_error("root hash before mismatch: argument does not match step log header"); } - if (context.logged_mcycle_count != mcycle_count) { + if (context.log.requested_cycle_count != mcycle_count) { throw std::runtime_error("mcycle count mismatch: argument does not match step log header"); } - const uint64_t mcycle_end = saturating_add(a.read_mcycle(), context.logged_mcycle_count); + const uint64_t mcycle_end = saturating_add(a.read_mcycle(), context.log.requested_cycle_count); auto break_reason = interpret(a, mcycle_end); a.finish(); // validates computed final hash == logged final hash - if (context.logged_root_hash_after != root_hash_after) { + if (context.log.root_hash_after != root_hash_after) { throw std::runtime_error("root hash after mismatch: argument does not match step log header"); } return break_reason; @@ -2164,6 +2249,7 @@ mcycle_root_hashes machine::collect_mcycle_root_hashes(uint64_t mcycle_end, uint uint64_t mcycle_target = saturating_add(mcycle_start, mcycle_period - mcycle_phase, mcycle_end); uint64_t mcycle_reached = read_reg(reg::mcycle); bool at_fixed_point = false; + machine_hash root_hash{}; // Run until reaching next mcycle target while (mcycle_reached < mcycle_target) { @@ -2211,8 +2297,13 @@ mcycle_root_hashes machine::collect_mcycle_root_hashes(uint64_t mcycle_end, uint // Add the current root hash to the back tree whenever we reach a period boundary or a fixed point // This ensures we only append at the correct intervals, even when mcycle_end does not align with the period if (result.mcycle_phase == 0 || at_fixed_point) { + // When the machine has rejected an input, the canonical root hash from the yield onward + // is the recorded revert root hash + const state_access sa(*this); + root_hash = is_rejected_manual_yield(sa) ? read_revert_root_hash() : m_ht.get_root_hash(); + // Append root hash relative to this period to the result - back_tree.push_back(m_ht.get_root_hash()); + back_tree.push_back(root_hash); // When back tree is full, we can append the bundled root hash and reset it if (back_tree.full()) { @@ -2233,9 +2324,9 @@ mcycle_root_hashes machine::collect_mcycle_root_hashes(uint64_t mcycle_end, uint // If the machine yielded manually or halted, then append bundled root hashes with padding if (at_fixed_point && log2_bundle_mcycle_count > 0) { - // Construct pad tree containing repetitions of the current root hash - const auto pad_hashes = back_merkle_tree::make_pad_hashes(m_ht.get_root_hash(), log2_bundle_mcycle_count, - m_c.hash_tree.hash_function); + // Construct pad tree containing repetitions of the last collected root hash + const auto pad_hashes = + back_merkle_tree::make_pad_hashes(root_hash, log2_bundle_mcycle_count, m_c.hash_tree.hash_function); // Pad back tree when partially filled and append its bundled root hash if (!back_tree.empty()) { @@ -2257,8 +2348,78 @@ mcycle_root_hashes machine::collect_mcycle_root_hashes(uint64_t mcycle_end, uint return result; } +/// \brief Appends the root hash after one uarch cycle to the collection result. +/// \param result Collection result receiving hashes and bundled root hashes. +/// \param back_tree Back tree bundling the root hashes. +/// \param cycle_root_hash Root hash after the uarch cycle. +static void append_uarch_cycle_root_hash(uarch_cycle_root_hashes &result, back_merkle_tree &back_tree, + const machine_hash &cycle_root_hash) { + back_tree.push_back(cycle_root_hash); + + // When back tree is full, we can append the bundled root hash and reset it + if (back_tree.full()) { + result.hashes.emplace_back(back_tree.get_root_hash()); + back_tree.clear(); + } +} + +/// \brief Appends the root hash after the uarch reset that ends one mcycle to the collection result. +/// \param result Collection result receiving hashes and bundled root hashes. +/// \param back_tree Back tree bundling the root hashes. +/// \param log2_bundle_uarch_cycle_count Log base 2 of the amount of uarch cycle root hashes to bundle. +/// \param halt_root_hash Root hash after the uarch halted, padding the bundles up to the reset entry. +/// \param reset_root_hash Root hash after the uarch reset. +static void append_uarch_reset_root_hash(uarch_cycle_root_hashes &result, back_merkle_tree &back_tree, + int32_t log2_bundle_uarch_cycle_count, const machine_hash &halt_root_hash, const machine_hash &reset_root_hash) { + if (log2_bundle_uarch_cycle_count > 0) { + const auto halt_pad_hashes = back_merkle_tree::make_pad_hashes(halt_root_hash, log2_bundle_uarch_cycle_count, + back_tree.get_hash_function()); + + // Pad back tree when partially filled and then append its bundled root hash + if (!back_tree.empty()) { + assert(!back_tree.full()); + back_tree.pad_back(back_tree.get_remaining_leaf_count(), halt_pad_hashes); + result.hashes.emplace_back(back_tree.get_root_hash()); + back_tree.clear(); + } + + // Append bundled root hash containing only repetitions of the halt root hash + result.hashes.emplace_back(halt_pad_hashes[log2_bundle_uarch_cycle_count]); + + // Append bundled root hash containing repetitions of the halt root hash on the left + // and one reset root hash on the right + assert(back_tree.empty()); + back_tree.pad_back((UINT64_C(1) << log2_bundle_uarch_cycle_count) - 1, halt_pad_hashes); + back_tree.push_back(reset_root_hash); + assert(back_tree.full()); + result.hashes.emplace_back(back_tree.get_root_hash()); + back_tree.clear(); + } else { + result.hashes.push_back(reset_root_hash); + } + + // Add the index where reset happened + result.reset_indices.emplace_back(result.hashes.size() - 1); +} + +/// \brief Appends the period of the reverted machine to the collection result. +/// \param result Collection result receiving hashes and bundled root hashes. +/// \param back_tree Back tree bundling the root hashes. +/// \param log2_bundle_uarch_cycle_count Log base 2 of the amount of uarch cycle root hashes to bundle. +/// \param revert_uarch_tail Root hashes after each uarch cycle of the reverted machine period, +/// the last being the revert root hash itself, which is the reset entry of the period. +static void append_revert_uarch_tail_period(uarch_cycle_root_hashes &result, back_merkle_tree &back_tree, + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) { + assert(revert_uarch_tail.size() >= 2); + for (size_t i = 0; i + 1 < revert_uarch_tail.size(); ++i) { + append_uarch_cycle_root_hash(result, back_tree, revert_uarch_tail[i]); + } + append_uarch_reset_root_hash(result, back_tree, log2_bundle_uarch_cycle_count, + revert_uarch_tail[revert_uarch_tail.size() - 2], revert_uarch_tail.back()); +} + uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle_end, - int32_t log2_bundle_uarch_cycle_count) { + int32_t log2_bundle_uarch_cycle_count, const machine_hashes &revert_uarch_tail) { const uint64_t mcycle_start = read_reg(reg::mcycle); // Check preconditions @@ -2282,6 +2443,28 @@ uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle throw std::runtime_error{"microarchitecture is not reset"}; } + // A call that can execute instructions may end in a rejected manual yield, and a call on a + // machine already in that state must emit the period of the reverted machine. Both need the + // revert uarch tail, which is checked here, before anything executes, so a failed call + // leaves the machine unchanged and can be retried with the tail in hand. A call starting at + // any other fixed point can only perform a no-op mcycle that cannot reject, so it never + // consumes the tail. + const state_access sa(*this); + const bool start_rejected = is_rejected_manual_yield(sa); + const bool start_at_fixed_point = + read_reg(reg::iflags_H) != 0 || read_reg(reg::iflags_Y) != 0 || mcycle_start == UINT64_MAX; + if (start_rejected || !start_at_fixed_point) { + if (revert_uarch_tail.empty()) { + throw std::runtime_error{"revert uarch tail is required"}; + } + if (revert_uarch_tail.size() < 2) { + throw std::runtime_error{"revert uarch tail is too short"}; + } + if (revert_uarch_tail.back() != read_revert_root_hash()) { + throw std::runtime_error{"revert uarch tail does not end with the revert root hash"}; + } + } + // If the collection loop does not advance mcycle, set the break reason to indicate the target mcycle was reached uarch_cycle_root_hashes result; result.break_reason = interpreter_break_reason::reached_target_mcycle; @@ -2289,15 +2472,26 @@ uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle // Initialize back tree back_merkle_tree back_tree(log2_bundle_uarch_cycle_count, m_c.hash_tree.hash_function); + // When the machine has already rejected an input, the canonical timeline continues from the + // reverted machine, so the result is its period, taken from the revert uarch tail, and the + // machine is left untouched + if (start_rejected) { + append_revert_uarch_tail_period(result, back_tree, log2_bundle_uarch_cycle_count, revert_uarch_tail); + result.break_reason = interpreter_break_reason::yielded_manually; + assert(back_tree.empty()); + return result; + } + hash_tree::dirty_words_type reset_dirty_words; collect_uarch_cycle_hashes_state_access::context context{}; const collect_uarch_cycle_hashes_state_access a(context, *this); // Reserve space before entering the loop to minimize dynamic memory allocations, - // the reserved sizes below are based on empirical benchmarks to balance performance and memory usage + // the reserved sizes below are based on empirical benchmarks to balance performance and memory usage, + // and are clamped to avoid over-allocation const uint64_t mcycle_count = mcycle_end - mcycle_start; - result.hashes.reserve(mcycle_count * 512); - result.reset_indices.reserve(mcycle_count); + result.hashes.reserve(std::clamp(mcycle_count * 512, 1, 16384)); + result.reset_indices.reserve(std::clamp(mcycle_count, 1, 16384)); context.dirty_words.reserve(8); reset_dirty_words.reserve(64); @@ -2306,8 +2500,8 @@ uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle uint64_t mcycle_target = saturating_add(mcycle_start, UINT64_C(1), mcycle_end); uint64_t mcycle_reached = read_reg(reg::mcycle); - // In case we start at fixed point, we will attempt to execute one extra mcycle which - bool at_fixed_point = read_reg(reg::iflags_H) != 0 || read_reg(reg::iflags_Y) != 0 || mcycle_start == UINT64_MAX; + // In case we start at fixed point, we will attempt to execute one extra mcycle + bool at_fixed_point = start_at_fixed_point; if (at_fixed_point) { mcycle_target = mcycle_reached; } @@ -2354,13 +2548,7 @@ uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle assert(uarch_break_reason == uarch_interpreter_break_reason::reached_target_cycle); // Append root hash to the result - back_tree.push_back(m_ht.get_root_hash()); - - // When back tree is full, we can append the bundled root hash and reset it - if (back_tree.full()) { - result.hashes.emplace_back(back_tree.get_root_hash()); - back_tree.clear(); - } + append_uarch_cycle_root_hash(result, back_tree, m_ht.get_root_hash()); } // Sanity check to ensure the loop is working correctly, this should always be true @@ -2396,41 +2584,14 @@ uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle throw std::runtime_error{"update hash tree failed"}; } reset_dirty_words.clear(); - const auto reset_root_hash = m_ht.get_root_hash(); - // Add one hash after the uarch reset - if (log2_bundle_uarch_cycle_count > 0) { - const auto halt_pad_hashes = back_merkle_tree::make_pad_hashes(halt_root_hash, - log2_bundle_uarch_cycle_count, m_c.hash_tree.hash_function); - - // Pad back tree when partially filled and then append its bundled root hash - if (!back_tree.empty()) { - assert(!back_tree.full()); - back_tree.pad_back(back_tree.get_remaining_leaf_count(), halt_pad_hashes); - result.hashes.emplace_back(back_tree.get_root_hash()); - back_tree.clear(); - } - - // Append bundled root hash containing only repetitions of the halt root hash - result.hashes.emplace_back(halt_pad_hashes[log2_bundle_uarch_cycle_count]); - - // Append bundled root hash containing repetitions of the halt root hash on the left - // and one reset root hash on the right - assert(back_tree.empty()); - back_tree.pad_back((1 << log2_bundle_uarch_cycle_count) - 1, halt_pad_hashes); - back_tree.push_back(reset_root_hash); - assert(back_tree.full()); - result.hashes.emplace_back(back_tree.get_root_hash()); - back_tree.clear(); + // When the machine has rejected an input, the reset folds in a revert, and the canonical + // root hash after it is the recorded revert root hash + const bool rejected = is_rejected_manual_yield(sa); + const auto reset_root_hash = rejected ? read_revert_root_hash() : m_ht.get_root_hash(); - // Add the index where reset happened - result.reset_indices.emplace_back(result.hashes.size() - 1); - } else { - result.hashes.push_back(reset_root_hash); - - // Add the index where reset happened - result.reset_indices.emplace_back(result.hashes.size() - 1); - } + // Add one hash after the uarch reset + append_uarch_reset_root_hash(result, back_tree, log2_bundle_uarch_cycle_count, halt_root_hash, reset_root_hash); mcycle_reached = read_reg(reg::mcycle); @@ -2447,6 +2608,14 @@ uarch_cycle_root_hashes machine::collect_uarch_cycle_root_hashes(uint64_t mcycle break; } + // When the machine has rejected an input, the canonical timeline continues from the + // reverted machine, so the period that follows comes from the revert uarch tail + // instead of the machine itself + if (rejected) { + append_revert_uarch_tail_period(result, back_tree, log2_bundle_uarch_cycle_count, revert_uarch_tail); + break; + } + // If we already attempted to advance one mcycle over a fixed point, we are done if (at_fixed_point) { break; diff --git a/src/machine.hpp b/src/machine.hpp index 7cabaa2d8..c158ae6a7 100644 --- a/src/machine.hpp +++ b/src/machine.hpp @@ -28,7 +28,6 @@ #include #include -#include "access-log.hpp" #include "address-range.hpp" #include "back-merkle-tree.hpp" #include "hash-tree-constants.hpp" @@ -264,34 +263,51 @@ class machine final { /// Stores into result.break_reason the reason why the function returned. /// \detail The first hash added to \p result.hashes is the root hash after the first uarch cycle, the last is the /// root hash at the time function returns (for whatever reason), which always happens right after an uarch reset. - uarch_cycle_root_hashes collect_uarch_cycle_root_hashes(uint64_t mcycle_end, int32_t log2_bundle_uarch_cycle_count); - - /// \brief Advances one micro step and returns a state access log. - /// \param log_type Type of access log to generate. - /// \returns The state access log. - access_log log_step_uarch(const access_log::type &log_type); + /// \param revert_uarch_tail Root hashes after each uarch cycle of the period of the machine the recorded + /// revert root hash reverts to, the last entry being the revert root hash itself (the reset entry of that + /// period). It is obtained by calling this function with no bundling on that machine, while it waits for + /// a response. Required unless the machine starts at a fixed point other than a rejected manual yield, + /// in which case the call cannot consume it and ignores it. + /// When the machine ends in a manual yield whose reason is rx-rejected, the root hash after the final uarch + /// reset is substituted by the recorded revert root hash, and one extra period, that of the reverted machine + /// as given by \p revert_uarch_tail, is collected after it. + uarch_cycle_root_hashes collect_uarch_cycle_root_hashes(uint64_t mcycle_end, int32_t log2_bundle_uarch_cycle_count, + const machine_hashes &revert_uarch_tail = {}); + + /// \brief Runs the uarch for the given cycle count (or halt) and writes a binary step log to a file. + /// \param uarch_cycle_count Number of cycles to advance; the run stops earlier on halt or overflow. + /// \param filename Path where the binary step log will be saved. + /// \returns Reason the uarch step ended. + uarch_interpreter_break_reason log_step_uarch(uint64_t uarch_cycle_count, const std::string &filename); /// \brief Resets the entire uarch state to pristine values. void reset_uarch(); - /// \brief Resets the microarchitecture state and returns an access log - /// \param log_type Type of access log to generate. - /// \param log_data If true, access data is recorded in the log, otherwise only hashes. The default is false. - /// \returns The state access log. - access_log log_reset_uarch(const access_log::type &log_type); + /// \brief Resets the microarchitecture state and writes a binary step log to a file. + /// \param filename Path where the binary step log will be saved. + void log_reset_uarch(const std::string &filename); /// \brief Checks the validity of a state transition caused by log_step_uarch. /// \param root_hash_before State hash before step. - /// \param log Step state access log. + /// \param filename Path to the binary step log file produced by log_step_uarch. + /// \param uarch_cycle_count Number of cycles the caller expects to have been advanced. /// \param root_hash_after State hash after step. - static void verify_step_uarch(const machine_hash &root_hash_before, const access_log &log, - const machine_hash &root_hash_after); + static void verify_step_uarch(const machine_hash &root_hash_before, const std::string &filename, + uint64_t uarch_cycle_count, const machine_hash &root_hash_after); + + /// \brief Replays a uarch step log and returns a human-readable printout. + /// \param filename Path to a binary step log file produced by log_step_uarch. + /// \returns The printout text; the caller decides where to write it. + /// \details Decodes and replays the log purely to produce the printout; no caller belief is + /// checked. The printout shows each uarch instruction bracketed by its mnemonic, with the reads + /// and writes it performs (and the old/new value of each write) nested underneath. + static std::string pretty_print_step_uarch(const std::string &filename); /// \brief Checks the validity of a state transition caused by log_reset_uarch. - /// \param root_hash_before State hash before uarch reset - /// \param log Step state access log. + /// \param root_hash_before State hash before uarch reset. + /// \param filename Path to the binary step log file produced by log_reset_uarch. /// \param root_hash_after State hash after uarch reset. - static void verify_reset_uarch(const machine_hash &root_hash_before, const access_log &log, + static void verify_reset_uarch(const machine_hash &root_hash_before, const std::string &filename, const machine_hash &root_hash_after); /// \brief Returns copy of default machine config @@ -396,6 +412,24 @@ class machine final { /// \returns The hash. machine_hash get_root_hash() const; + /// \brief Reads the revert root hash from the shadow state. + /// \returns The hash. + machine_hash read_revert_root_hash() const; + + /// \brief Writes the revert root hash to the shadow state. + /// \param hash Hash to store. + void write_revert_root_hash(const_machine_hash_view hash); + + /// \brief Checks that the machine can receive a cmio response with the given revert root hash. + /// \param revert_root_hash Machine root hash to revert to in case the response is eventually rejected. + /// \param reason Reason for sending the response. + /// \param length Length of response data. + /// \details Throws when the machine is not waiting on a manual yield or when the response data + /// does not fit in the rx buffer. For advance-state responses, also throws when the machine is + /// not waiting on an rx-accepted manual yield or when \p revert_root_hash differs from the + /// machine root hash. Called by the host send variant before any state changes. + void check_pending_cmio_request(const_machine_hash_view revert_root_hash, uint16_t reason, uint64_t length) const; + /// \brief Obtains the hash of a node in the hash-tree. /// \param address Address of target node. Must be aligned to a 2log2_size boundary. /// \param log2_size log2 of size subintended by target node. @@ -516,6 +550,10 @@ class machine final { return const_cast(std::as_const(*this).read_pma(index)); } + /// \brief Mark as dirty the page currently mapped by a write TLB slot, if any. + /// \param slot_index Index of the write TLB slot to inspect. + void mark_write_tlb_dirty_page(uint64_t slot_index) const; + /// \brief Go over the write TLB and mark as dirty all pages currently there. void mark_write_tlb_dirty_pages() const; @@ -538,10 +576,13 @@ class machine final { } /// \brief Sends cmio response + /// \param revert_root_hash Root hash stored in the revert-root-hash shadow slot, to revert to if + /// the input delivered by this response is later rejected. /// \param reason Reason for sending response. /// \param data Response data. /// \param length Length of response data. - void send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length); + void send_cmio_response(const_machine_hash_view revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length); /// \brief Converts from machine host address to target physical address /// \param haddr Machine host address to convert @@ -562,15 +603,7 @@ class machine final { } /// \brief Marks a page as dirty - /// \param haddr Machine host address within page - /// \param pma_index Index of PMA where address falls - void mark_dirty_page(host_addr haddr, uint64_t pma_index) { - auto paddr = get_paddr(haddr, pma_index); - mark_dirty_page(paddr, pma_index); - } - - /// \brief Marks a page as dirty - /// \param paddr Target phyislcal address within page + /// \param paddr Target physical address within page /// \param pma_index Index of PMA where address falls void mark_dirty_page(uint64_t paddr, uint64_t pma_index) { auto &ar = read_pma(pma_index); @@ -585,6 +618,10 @@ class machine final { /// \param pma_index Index of PMA where address falls void write_verified_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, host_addr vh_offset, uint64_t pma_index) { + // Mark the page currently mapped here dirty before we overwrite the slot + if (set_index == TLB_WRITE) { + mark_write_tlb_dirty_page(slot_index); + } m_s->penumbra.tlb[set_index][slot_index].vaddr_page = vaddr_page; m_s->penumbra.tlb[set_index][slot_index].vh_offset = vh_offset; m_s->shadow.tlb[set_index][slot_index].vaddr_page = vaddr_page; @@ -608,6 +645,10 @@ class machine final { if (slot_index >= TLB_SET_SIZE) { throw std::out_of_range{"TLB slot index out of bounds"}; } + // Mark the page currently mapped here dirty before we overwrite the slot + if (set_index == TLB_WRITE) { + mark_write_tlb_dirty_page(slot_index); + } m_s->penumbra.tlb[set_index][slot_index].vaddr_page = TLB_UNVERIFIED_PAGE; m_s->penumbra.tlb[set_index][slot_index].vh_offset = host_addr{0}; m_s->shadow.tlb[set_index][slot_index].vaddr_page = vaddr_page; @@ -642,24 +683,32 @@ class machine final { } } - /// \brief Sends cmio response and returns an access log + /// \brief Sends cmio response and writes a binary step log to a file. + /// \param revert_root_hash Root hash stored in the revert-root-hash shadow slot, to revert to if + /// the input delivered by this response is later rejected. /// \param reason Reason for sending response. /// \param data Response data. /// \param length Length of response data. - /// \param log_type Type of access log to generate. - /// \return The state access log. - access_log log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const access_log::type &log_type); + /// \param filename Path where the binary step log will be saved. + /// \details The logged operation cannot fail, so the honest party can always prove the resulting + /// state transition. It is a no-op that leaves the state unchanged when the machine is not waiting + /// on a manual yield, when an advance-state response finds the machine yielded with a reason other + /// than rx-accepted (e.g., it rejected an input or threw an exception), or when the response data + /// does not fit in the rx buffer. + void log_send_cmio_response(const_machine_hash_view revert_root_hash, uint16_t reason, const unsigned char *data, + uint64_t length, const std::string &filename); /// \brief Checks the validity of state transitions caused by log_send_cmio_response. + /// \param revert_root_hash Root hash that was stored in the revert-root-hash shadow slot. /// \param reason Reason for sending response. /// \param data The response sent when the log was generated. - /// \param length Length of response + /// \param length Length of response. /// \param root_hash_before State hash before response was sent. - /// \param log Log containing the state accesses performed by the load operation + /// \param filename Path to the binary step log file produced by log_send_cmio_response. /// \param root_hash_after State hash after response was sent. - static void verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, - const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after); + static void verify_send_cmio_response(const_machine_hash_view revert_root_hash, uint16_t reason, + const unsigned char *data, uint64_t length, const machine_hash &root_hash_before, const std::string &filename, + const machine_hash &root_hash_after); /// \brief Returns a description of what is at a given target physical address /// \param paddr Target physical address of interest diff --git a/src/record-send-cmio-state-access.hpp b/src/record-send-cmio-state-access.hpp deleted file mode 100644 index b731a35f8..000000000 --- a/src/record-send-cmio-state-access.hpp +++ /dev/null @@ -1,293 +0,0 @@ -// Copyright Cartesi and individual authors (see AUTHORS) -// SPDX-License-Identifier: LGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation, either version 3 of the License, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License along -// with this program (see COPYING). If not, see . -// - -#ifndef RECORD_SEND_CMIO_STATE_ACCESS_HPP -#define RECORD_SEND_CMIO_STATE_ACCESS_HPP - -/// \file -/// \brief State access implementation that records state accesses to an access log. - -#include -#include -#include -#include - -#include "access-log.hpp" -#include "assert-printf.hpp" -#include "hash-tree-constants.hpp" -#include "host-addr.hpp" -#include "i-accept-scoped-notes.hpp" -#include "i-hasher.hpp" -#include "i-state-access.hpp" -#include "machine-hash.hpp" -#include "machine-reg.hpp" -#include "machine.hpp" -#include "meta.hpp" -#include "processor-state.hpp" -#include "scoped-note.hpp" -#include "shadow-registers.hpp" -#include "variant-hasher.hpp" - -namespace cartesi { - -class record_send_cmio_state_access; - -// Type trait that should return the fast_addr type for a state access class -template <> -struct i_state_access_fast_addr { - using type = host_addr; -}; - -/// \class record_send_cmio_state_access -/// \details This records all state accesses that happen during the execution of -/// a machine::send_cmio_response() function call -class record_send_cmio_state_access : - public i_state_access, - public i_accept_scoped_notes { - // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) - machine &m_m; ///< Associated machine - access_log &m_log; ///< Pointer to access log - // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) - - template - static void get_hash(H &h, const access_data &data, machine_hash &hash) { - get_merkle_tree_hash(h, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, - hash); - } - -public: - /// \brief Constructor from machine state. - /// \param m Reference to machine state. - /// \param log Reference to access log. - explicit record_send_cmio_state_access(machine &m, access_log &log) : m_m(m), m_log(log) { - ; - } - -private: - /// \brief Logs a read access of a uint64_t word from the machine state. - /// \param paligned Physical address in the machine state, aligned to a 64-bit word. - /// \param text Textual description of the access. - void log_read(uint64_t paligned, const char *text) const { - static_assert(HASH_TREE_LOG2_WORD_SIZE >= log2_size_v, - "hash tree word size cannot be narrower than machine word"); - if ((paligned & (sizeof(uint64_t) - 1)) != 0) { - throw std::invalid_argument{"paligned is not aligned to word size"}; - } - const uint64_t pleaf_aligned = paligned & ~(HASH_TREE_WORD_SIZE - 1); - access a; - - // We can skip updating the hash tree while getting the proof because we assume that: - // 1) A full hash tree update was called at the beginning of machine::log_load_cmio_input() - // 2) We called update_hash_tree_page on all write accesses - const auto proof = m_m.get_proof(skip_hash_tree_update, pleaf_aligned, HASH_TREE_LOG2_WORD_SIZE); - // We just store the sibling hashes in the access because this is the only missing piece of data needed to - // reconstruct the proof - a.set_sibling_hashes(proof.get_sibling_hashes()); - - a.set_type(access_type::read); - a.set_address(paligned); - a.set_log2_size(log2_size_v); - // NOLINTBEGIN(bugprone-unchecked-optional-access) - // we log the leaf data at pleaf_aligned that contains the word at paligned - a.get_read().emplace(); - a.get_read().value().resize(HASH_TREE_WORD_SIZE); - // read the entire leaf where the word is located - m_m.read_memory(pleaf_aligned, a.get_read().value().data(), HASH_TREE_WORD_SIZE); - variant_hasher h(m_m.get_hash_function()); - get_hash(h, a.get_read().value(), a.get_read_hash()); - // NOLINTEND(bugprone-unchecked-optional-access) - m_log.push_access(std::move(a), text); - } - - /// \brief Logs a write access before it happens. - /// \param paligned Physical address of the word in the machine state (Must be aligned to a 64-bit word). - /// \param val Value to write. - /// \param text Textual description of the access. - void log_before_write(uint64_t paligned, uint64_t val, const char *text) const { - static_assert(HASH_TREE_LOG2_WORD_SIZE >= log2_size_v, - "hash tree word size must be at least as large as a machine word"); - if ((paligned & (sizeof(uint64_t) - 1)) != 0) { - throw std::invalid_argument{"paligned is not aligned to word size"}; - } - // address of the leaf that contains the word at paligned - const uint64_t pleaf_aligned = paligned & ~(HASH_TREE_WORD_SIZE - 1); - access a; - - // We can skip updating the hash tree while getting the proof because we assume that: - // 1) A full hash tree update was called at the beginning of machine::log_load_cmio_input() - // 2) We called update_hash_tree_page on all write accesses - const auto proof = m_m.get_proof(skip_hash_tree_update, pleaf_aligned, HASH_TREE_LOG2_WORD_SIZE); - // We just store the sibling hashes in the access because this is the only missing piece of data needed to - // reconstruct the proof - a.set_sibling_hashes(proof.get_sibling_hashes()); - - a.set_type(access_type::write); - a.set_address(paligned); - a.set_log2_size(log2_size_v); - // NOLINTBEGIN(bugprone-unchecked-optional-access) - // we log the entire leaf where the word is located - a.get_read().emplace(); - a.get_read().value().resize(HASH_TREE_WORD_SIZE); - m_m.read_memory(pleaf_aligned, a.get_read().value().data(), HASH_TREE_WORD_SIZE); - variant_hasher h(m_m.get_hash_function()); - get_hash(h, a.get_read().value(), a.get_read_hash()); - // the logged written data is the same as the read data, but with the word at paligned replaced by word - a.set_written(access_data(a.get_read().value())); // copy the read data - const int word_offset = static_cast(paligned - pleaf_aligned); // offset of word in leaf - replace_word_access_data(val, a.get_written().value(), word_offset); // replace the word - // compute the hash of the written data - a.get_written_hash().emplace(); - get_hash(h, a.get_written().value(), a.get_written_hash().value()); - // NOLINTEND(bugprone-unchecked-optional-access) - m_log.push_access(std::move(a), text); - } - - /// \brief Updates the hash tree after the modification of a word in the machine state. - /// \param paligned Physical address in the machine state, aligned to a 64-bit word. - void update_after_write(uint64_t paligned) const { - assert((paligned & (sizeof(uint64_t) - 1)) == 0); - [[maybe_unused]] const bool updated = m_m.update_hash_tree_page(paligned); - assert(updated); - } - - /// \brief Logs a write access before it happens, writes, and then update the hash tree. - /// \param paligned Physical address of the word in the machine state (Must be aligned to a 64-bit word). - /// \param dest Reference to value before writing. - /// \param val Value to write to \p dest. - /// \param text Textual description of the access. - void log_before_write_write_and_update(uint64_t paligned, uint64_t &dest, uint64_t val, const char *text) const { - assert((paligned & (sizeof(uint64_t) - 1)) == 0); - log_before_write(paligned, val, text); - dest = val; - update_after_write(paligned); - } - - void log_before_write_word_write_and_update(uint64_t paligned, bool &dest, bool val, const char *text) const { - auto dest64 = static_cast(dest); - log_before_write_write_and_update(paligned, dest64, static_cast(val), text); - dest = (dest64 != 0); - update_after_write(paligned); - } - - // ----- - // i_state_access interface implementation - // ----- - friend i_state_access; - - void do_write_iflags_Y(uint64_t val) const { - log_before_write_write_and_update(machine_reg_address(machine_reg::iflags_Y), - m_m.get_state().shadow.registers.iflags.Y, val, "iflags.Y"); - } - - uint64_t do_read_iflags_Y() const { - log_read(machine_reg_address(machine_reg::iflags_Y), "iflags.Y"); - return m_m.get_state().shadow.registers.iflags.Y; - } - - void do_write_htif_fromhost(uint64_t val) const { - log_before_write_write_and_update(machine_reg_address(machine_reg::htif_fromhost), - m_m.get_state().shadow.registers.htif.fromhost, val, "htif.fromhost"); - } - - void do_write_memory_with_padding(uint64_t paddr, const unsigned char *data, uint64_t data_length, - int write_length_log2_size) const { - if ((paddr & (HASH_TREE_WORD_SIZE - 1)) != 0) { - throw std::invalid_argument("paddr is not aligned to tree leaf size"); - } - if (data == nullptr) { - throw std::invalid_argument("data is null"); - } - const uint64_t write_length = static_cast(1) << write_length_log2_size; - if (write_length < data_length) { - throw std::invalid_argument("write_length is less than data_length"); - } - // We need to compute the hash of the existing data before writing - // Find the target address range - const auto &ar = m_m.find_address_range(paddr, write_length); - if (!ar.is_memory()) { - throw std::invalid_argument("address range not entirely in memory PMA"); - } - access a{}; - a.set_type(access_type::write); - a.set_address(paddr); - a.set_log2_size(write_length_log2_size); - // Always compute the proof, even if we are not logging it, because: - // (1) we always need to compute read_hash. - // (2) Depending on the log type, we may also need to compute the proof. - // (3) proof.target_hash is the value that we need for a.read_hash in (1). - auto proof = m_m.get_proof(paddr, write_length_log2_size); - // log hash and data before write - a.set_read_hash(proof.get_target_hash()); - if (m_log.get_log_type().has_large_data()) { - access_data &data = a.get_read().emplace(write_length); - memcpy(data.data(), ar.get_host_memory(), write_length); - } - - // We just store the sibling hashes in the access because this is the only missing piece of data needed to - // reconstruct the proof - a.set_sibling_hashes(proof.get_sibling_hashes()); - - // write data to memory - m_m.write_memory(paddr, data, data_length); - - if (write_length > data_length) { - m_m.fill_memory(paddr + data_length, 0, write_length - data_length); - } - // we have to update the hash tree after every write - m_m.update_hash_tree(); - - // log hash and written data - // NOLINTBEGIN(bugprone-unchecked-optional-access) - a.get_written_hash().emplace(); - variant_hasher h(m_m.get_hash_function()); - const auto offset = paddr - ar.get_start(); - get_merkle_tree_hash(h, - std::span{ar.get_host_memory() + offset, static_cast(write_length)}, - HASH_TREE_WORD_SIZE, a.get_written_hash().value()); - if (m_log.get_log_type().has_large_data()) { - access_data &data = a.get_written().emplace(write_length); - memcpy(data.data(), ar.get_host_memory() + offset, write_length); - } - // NOLINTEND(bugprone-unchecked-optional-access) - m_log.push_access(a, "cmio rx buffer"); - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - constexpr const char *do_get_name() const { - return "record_send_cmio_state_access"; - } - - // ----- - // i_accept_scoped_notes interface implementation - // ----- - friend i_accept_scoped_notes; - - void do_push_begin_bracket(const char *text) const { - m_log.push_begin_bracket(text); - } - - void do_push_end_bracket(const char *text) const { - m_log.push_end_bracket(text); - } - - auto do_make_scoped_note(const char *text) const { - return scoped_note{*this, text}; - } -}; - -} // namespace cartesi - -#endif diff --git a/src/record-step-state-access.hpp b/src/record-step-state-access.hpp index 0b53a129a..5b43ede3b 100644 --- a/src/record-step-state-access.hpp +++ b/src/record-step-state-access.hpp @@ -17,6 +17,7 @@ #ifndef RECORD_STEP_STATE_ACCESS_HPP #define RECORD_STEP_STATE_ACCESS_HPP +#include #include #include #include @@ -30,6 +31,7 @@ #include "hash-tree-constants.hpp" #include "hash-tree.hpp" #include "host-addr.hpp" +#include "i-accept-dirty-pages.hpp" #include "i-accept-scoped-notes.hpp" #include "i-prefer-shadow-state.hpp" #include "i-state-access.hpp" @@ -43,6 +45,7 @@ #include "riscv-constants.hpp" #include "shadow-registers.hpp" #include "shadow-tlb.hpp" +#include "step-log.hpp" #include "unique-c-ptr.hpp" #include "variant-hasher.hpp" @@ -61,12 +64,14 @@ struct i_state_access_fast_addr { class record_step_state_access : public i_state_access, public i_accept_scoped_notes, + public i_accept_dirty_pages, public i_prefer_shadow_state { using page_data_type = std::array; using pages_type = std::map; using sibling_hashes_type = hash_tree::sibling_hashes_type; using page_indices_type = std::vector; + using nodes_type = std::map; public: struct context { @@ -77,15 +82,16 @@ class record_step_state_access : hash_function(hash_function) { ; } - std::string filename; ///< where to save the log - hash_function_type hash_function; ///< hash function type to use for the log - mutable pages_type touched_pages; ///< copy of all pages touched during execution + std::string filename; ///< Where to save the log + hash_function_type hash_function; ///< Hash function type to use for the log + mutable pages_type touched_pages; ///< Copy of all pages touched during execution + mutable nodes_type touched_nodes; ///< Subtrees touched during execution }; private: // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) - context &m_context; ///< context for the recording - machine &m_m; ///< reference to machine + context &m_context; ///< Context for the recording + machine &m_m; ///< Reference to machine // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) public: @@ -100,59 +106,49 @@ class record_step_state_access : } /// \brief Finish recording and save the log file - void finish(const machine_hash &root_hash_before, uint64_t mcycle_count, const machine_hash &root_hash_after) { - // get sibling hashes of all touched pages + void finish(const machine_hash &root_hash_before, uint64_t requested_cycle_count, + const machine_hash &root_hash_after) { + // Fill in hash_after for each recorded node (tree is fresh from the outer get_root_hash call). + for (auto &[address, entry] : m_context.touched_nodes) { + entry.hash_after = m_m.get_node_hash(address, static_cast(entry.log2_size), skip_hash_tree_update); + } auto sibling_hashes = get_sibling_hashes(); - uint64_t page_count = m_context.touched_pages.size(); - uint64_t sibling_count = sibling_hashes.size(); - - // Write log file. - // The log format is as follows: - // root_hash_before, mcycle_count, root_hash_after, - // hash_function, page_count, [(page_index, data, scratch_area), ...], sibling_count, [sibling_hash, ...] - // We store the page index, instead of the page address. - // Scratch area is used by the replay to store page hashes, which change during replay - // This is to work around the lack of dynamic memory allocation when replaying the log in microarchitectures + + const step_log_header header{ + .signature = STEP_LOG_SIGNATURE, + .root_hash_before = root_hash_before, + .requested_cycle_count = requested_cycle_count, + .root_hash_after = root_hash_after, + .hash_function = static_cast(m_context.hash_function), + .page_count = m_context.touched_pages.size(), + .node_count = m_context.touched_nodes.size(), + .sibling_count = sibling_hashes.size(), + }; auto fp = make_unique_fopen(m_context.filename.c_str(), "wb"); - // write root hash before, mcycle count, and root hash after - if (fwrite(root_hash_before.data(), root_hash_before.size(), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write root hash before to log file"); - } - if (fwrite(&mcycle_count, sizeof(mcycle_count), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write mcycle count to log file"); - } - if (fwrite(root_hash_after.data(), root_hash_after.size(), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write root hash after to log file"); + if (fwrite(&header, sizeof(header), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write header to log file"); } - // write the hash function type so the hasher can be recreated by the replay - auto hash_function = static_cast(m_context.hash_function); - if (fwrite(&hash_function, sizeof(hash_function), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write hash function type to log file"); - } - if (fwrite(&page_count, sizeof(page_count), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write page count to log file"); - } - for (auto &[address, data] : m_context.touched_pages) { - const auto page_index = address >> HASH_TREE_LOG2_PAGE_SIZE; - if (fwrite(&page_index, sizeof(page_index), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write page index to log file"); - } - if (fwrite(data.data(), data.size(), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write page data to log file"); - } - static const machine_hash all_zeros{}; - if (fwrite(all_zeros.data(), sizeof(all_zeros), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write page hash scratch to log file"); + for (const auto &[address, data] : m_context.touched_pages) { + page_entry entry{ + .index = address >> HASH_TREE_LOG2_PAGE_SIZE, + .data = {}, + .hash = {}, // scratch; replayer fills this in from the data + }; + std::copy_n(data.data(), data.size(), entry.data); + if (fwrite(&entry, sizeof(entry), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write page entry to log file"); } } - if (fwrite(&sibling_count, sizeof(sibling_count), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write sibling count to log file"); - } - for (auto &hash : sibling_hashes) { - if (fwrite(hash.data(), sizeof(hash), 1, fp.get()) != 1) { - throw std::runtime_error("Could not write sibling hash to log file"); + for (const auto &[_, node] : m_context.touched_nodes) { + if (fwrite(&node, sizeof(node), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write node entry to log file"); } } + if (!sibling_hashes.empty() && + fwrite(sibling_hashes.data(), sizeof(machine_hash), sibling_hashes.size(), fp.get()) != + sibling_hashes.size()) { + throw std::runtime_error("Could not write sibling hashes to log file"); + } } private: @@ -165,46 +161,120 @@ class record_step_state_access : if (m_context.touched_pages.contains(page)) { return; // already saved } + // get first node with starting address > page or end of map + auto node_it = m_context.touched_nodes.upper_bound(page); + if (node_it != m_context.touched_nodes.begin()) { + const auto prev_node_it = std::prev(node_it); + const auto prev_node_end = prev_node_it->first + (UINT64_C(1) << prev_node_it->second.log2_size); + // Reject if the page falls inside a previously recorded node's range. + if (prev_node_end > page) { + throw std::runtime_error("page falls inside a recorded node's range"); + } + } auto [it, _] = m_context.touched_pages.emplace(page, page_data_type()); m_m.read_memory(page, it->second.data(), it->second.size()); } - /// \brief Get the sibling hashes of all touched pages + /// \brief Record that the subtree at (address, log2_size) is being touched. + /// \param address Subtree start address, must be aligned to 2^log2_size + /// \param log2_size Log2 of the subtree size. Must be > PAGE_SIZE and <= ROOT_SIZE. + /// \details Captures the subtree's current hash as hash_before. hash_after is + /// filled in during finish() once the machine's tree has been refreshed. + /// Rejects overlaps with existing nodes and enclosure of touched pages so + /// the "pages and nodes are pairwise disjoint" invariant holds at replay. + void touch_node(uint64_t address, int log2_size) const { + if (log2_size <= HASH_TREE_LOG2_PAGE_SIZE || log2_size > HASH_TREE_LOG2_ROOT_SIZE) { + throw std::runtime_error("node log2 size is out of range"); + } + const auto node_size = UINT64_C(1) << log2_size; + if ((address & (node_size - 1)) != 0) { + throw std::runtime_error("node address is not aligned to its size"); + } + const auto node_end = address + node_size; + // get first node with starting address >= address or end of map + auto next_node_it = m_context.touched_nodes.lower_bound(address); + // Reject if the next node starts inside this node's range. + if (next_node_it != m_context.touched_nodes.end() && next_node_it->first < node_end) { + throw std::runtime_error("node overlaps an existing node"); + } + if (next_node_it != m_context.touched_nodes.begin()) { + const auto prev_node_it = std::prev(next_node_it); + const auto prev_node_end = prev_node_it->first + (UINT64_C(1) << prev_node_it->second.log2_size); + // Reject if the previous node's range extends into this node's range. + if (prev_node_end > address) { + throw std::runtime_error("node overlaps an existing node"); + } + } + // get first page with starting address >= address or end of map + auto next_page_it = m_context.touched_pages.lower_bound(address); + // Reject if any existing page lies inside the node's range. + if (next_page_it != m_context.touched_pages.end() && next_page_it->first < node_end) { + throw std::runtime_error("node would enclose an existing page"); + } + m_context.touched_nodes.emplace(address, + node_entry{ + .address = address, + .log2_size = static_cast(log2_size), + .hash_before = m_m.get_node_hash(address, log2_size, skip_hash_tree_update), + .hash_after = {}, // filled in by finish() after the outer get_root_hash() refreshes the tree + }); + } + + /// \brief Collect the sibling hashes needed to reconstruct the root hash from touched_pages and touched_nodes. + /// \details Walks the tree with three cursors (pages, nodes, siblings). + /// A subtree whose range exactly matches a recorded node is consumed as a node + /// (no sibling emitted). Subtrees with no touched content emit one sibling hash. + /// Recursion descends into subtrees that contain at least one touched page or node. sibling_hashes_type get_sibling_hashes() { sibling_hashes_type sibling_hashes{}; - // page address are converted to page indices, in order to avoid overflows page_indices_type page_indices{}; - // iterate in ascending order of page addresses (the container is ordered by key) for (const auto &[address, _] : m_context.touched_pages) { page_indices.push_back(address >> HASH_TREE_LOG2_PAGE_SIZE); } auto next_page_index = page_indices.cbegin(); + auto next_node_it = m_context.touched_nodes.cbegin(); get_sibling_hashes_impl(0, HASH_TREE_LOG2_ROOT_SIZE - HASH_TREE_LOG2_PAGE_SIZE, page_indices, next_page_index, - sibling_hashes); + next_node_it, sibling_hashes); if (next_page_index != page_indices.cend()) { throw std::runtime_error("get_sibling_hashes failed to consume all pages"); } + if (next_node_it != m_context.touched_nodes.cend()) { + throw std::runtime_error("get_sibling_hashes failed to consume all nodes"); + } return sibling_hashes; } - /// \brief Recursively get the sibling hashes of all touched pages - /// \param page_index index of 1st page in range - /// \param page_count_log2_size log2 of the number of pages in range - /// \param page_indices indices of all pages - /// \param next_page_index smallest page index not visited yet - /// \param sibling_hashes stores the collected sibling hashes during the recursion + /// \brief Recursively collect sibling hashes for the subtree rooted at page_index + /// \param page_index Index of the first page in the subtree + /// \param page_count_log2_size Log2 of the number of pages in the subtree + /// \param page_indices All touched page indices, sorted ascending + /// \param next_page_index Cursor into page_indices; advances past each page consumed during recursion + /// \param next_node_it Cursor into touched_nodes; advances past each node consumed during recursion + /// \param sibling_hashes Accumulates sibling hashes for untouched subtrees void get_sibling_hashes_impl(uint64_t page_index, int page_count_log2_size, page_indices_type &page_indices, - page_indices_type::const_iterator &next_page_index, sibling_hashes_type &sibling_hashes) { - auto page_count = UINT64_C(1) << page_count_log2_size; - if (next_page_index == page_indices.cend() || page_index + page_count <= *next_page_index) { - // we can skip the hash tree update, because a full update was done before the recording started - sibling_hashes.push_back(m_m.get_node_hash(page_index << HASH_TREE_LOG2_PAGE_SIZE, - page_count_log2_size + HASH_TREE_LOG2_PAGE_SIZE, skip_hash_tree_update)); + page_indices_type::const_iterator &next_page_index, nodes_type::const_iterator &next_node_it, + sibling_hashes_type &sibling_hashes) { + const auto subtree_start_addr = page_index << HASH_TREE_LOG2_PAGE_SIZE; + const auto subtree_log2_size = page_count_log2_size + HASH_TREE_LOG2_PAGE_SIZE; + const auto page_count = UINT64_C(1) << page_count_log2_size; + const auto subtree_end_page_index = page_index + page_count; + + // next unconsumed page / node is inside this subtree? + const bool page_in = next_page_index != page_indices.cend() && *next_page_index < subtree_end_page_index; + // shift node address into page-index units to compare with subtree_end_page_index + const bool node_in = next_node_it != m_context.touched_nodes.cend() && + (next_node_it->first >> HASH_TREE_LOG2_PAGE_SIZE) < subtree_end_page_index; + + if (!page_in && !node_in) { + sibling_hashes.push_back(m_m.get_node_hash(subtree_start_addr, subtree_log2_size, skip_hash_tree_update)); + } else if (node_in && next_node_it->first == subtree_start_addr && + next_node_it->second.log2_size == static_cast(subtree_log2_size)) { + ++next_node_it; } else if (page_count_log2_size > 0) { - get_sibling_hashes_impl(page_index, page_count_log2_size - 1, page_indices, next_page_index, + get_sibling_hashes_impl(page_index, page_count_log2_size - 1, page_indices, next_page_index, next_node_it, sibling_hashes); get_sibling_hashes_impl(page_index + (UINT64_C(1) << (page_count_log2_size - 1)), page_count_log2_size - 1, - page_indices, next_page_index, sibling_hashes); + page_indices, next_page_index, next_node_it, sibling_hashes); } else { ++next_page_index; } @@ -351,9 +421,33 @@ class record_step_state_access : return m_m.get_host_addr(paddr, pma_index); } - void do_mark_dirty_page(host_addr haddr, uint64_t pma_index) const { - // this is a noop in replay_step_state_access, so we do nothing else - m_m.mark_dirty_page(haddr, pma_index); + /// \brief Record a cmio response write into the cmio rx buffer. + /// \param paddr Destination physical address; must be aligned to (1 << write_length_log2_size). + /// \param data Pointer to source bytes. + /// \param data_length Number of valid bytes at \p data. + /// \param write_length_log2_size Log2 of the full write length (data + zero padding). + /// \details Writes spanning more than a page are recorded as a single subtree + /// node (touch_node); writes fitting within a page fall back to page-level + /// recording (touch_page). In either case the actual memory write is delegated + /// to the machine, and the padding zeros are written via fill_memory. + void do_write_memory_with_padding(uint64_t paddr, const unsigned char *data, uint64_t data_length, + int write_length_log2_size) const { + if (data == nullptr) { + throw std::invalid_argument("data is null"); + } + const uint64_t write_length = UINT64_C(1) << write_length_log2_size; + if (write_length < data_length) { + throw std::invalid_argument("write_length is less than data_length"); + } + if (write_length_log2_size > HASH_TREE_LOG2_PAGE_SIZE) { + touch_node(paddr, write_length_log2_size); + } else { + touch_page(paddr); + } + m_m.write_memory(paddr, data, data_length); + if (write_length > data_length) { + m_m.fill_memory(paddr + data_length, 0, write_length - data_length); + } } bool do_putchar(uint8_t /*c*/) const { // NOLINT(readability-convert-member-functions-to-static) @@ -363,6 +457,15 @@ class record_step_state_access : constexpr const char *do_get_name() const { // NOLINT(readability-convert-member-functions-to-static) return "record_step_state_access"; } + + // ----- + // i_accept_dirty_pages interface implementation + // ----- + friend i_accept_dirty_pages; + + void do_mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { + m_m.mark_dirty_page(paddr, pma_index); + } }; } // namespace cartesi diff --git a/src/rejected-manual-yield.hpp b/src/rejected-manual-yield.hpp new file mode 100644 index 000000000..1e656705d --- /dev/null +++ b/src/rejected-manual-yield.hpp @@ -0,0 +1,48 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef REJECTED_MANUAL_YIELD_HPP +#define REJECTED_MANUAL_YIELD_HPP + +/// \file +/// \brief Predicate for the manual-yield rejected machine state + +#include + +#include "htif-constants.hpp" + +namespace cartesi { + +/// \brief Tells if the machine is paused on a manual yield whose reason is rx-rejected. +/// \tparam STATE_ACCESS State access class. +/// \param a State accessor. +/// \returns True when a manual yield with reason rx-rejected is pending. +/// \details This is the state from which verifiers substitute the recorded revert root +/// hash for the machine root hash. The uarch-dialect equivalent lives in +/// uarch-reset-state.cpp, where it is translated to Solidity. +template +bool is_rejected_manual_yield(const STATE_ACCESS &a) { + if (a.read_iflags_Y() == 0) { + return false; + } + const uint64_t tohost = a.read_htif_tohost(); + return HTIF_DEV_FIELD(tohost) == HTIF_DEV_YIELD && HTIF_CMD_FIELD(tohost) == HTIF_YIELD_CMD_MANUAL && + HTIF_REASON_FIELD(tohost) == HTIF_YIELD_MANUAL_REASON_RX_REJECTED; +} + +} // namespace cartesi + +#endif diff --git a/src/replay-send-cmio-state-access.hpp b/src/replay-send-cmio-state-access.hpp deleted file mode 100644 index ad3580438..000000000 --- a/src/replay-send-cmio-state-access.hpp +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright Cartesi and individual authors (see AUTHORS) -// SPDX-License-Identifier: LGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation, either version 3 of the License, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License along -// with this program (see COPYING). If not, see . -// - -#ifndef REPLAY_SEND_CMIO_STATE_ACCESS_HPP -#define REPLAY_SEND_CMIO_STATE_ACCESS_HPP - -/// \file -/// \brief State access implementation that replays recorded state accesses - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "access-log.hpp" -#include "hash-tree-constants.hpp" -#include "i-hasher.hpp" -#include "i-state-access.hpp" -#include "machine-hash.hpp" -#include "machine-reg.hpp" -#include "meta.hpp" -#include "unique-c-ptr.hpp" -#include "variant-hasher.hpp" - -namespace cartesi { - -class replay_send_cmio_state_access; - -// Type trait that should return the fast_addr type for a state access class -template <> -struct i_state_access_fast_addr { - using type = uint64_t; -}; - -/// \brief Allows replaying a machine::send_cmio_response() from an access log. -class replay_send_cmio_state_access final : public i_state_access { -public: - struct context { - /// \brief Constructor replay_send_cmio_state_access context - /// \param log Access log to be replayed - /// \param initial_hash Initial root hash - context(const access_log &log, const machine_hash &initial_hash, hash_function_type hash_function) : - accesses(log.get_accesses()), - root_hash(initial_hash), - hash_function(hash_function) { - ; - } - const std::vector &accesses; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) - ///< Index of next access to ne consumed - unsigned int next_access{}; - ///< Root hash before next access - machine_hash root_hash; - ///< Hash function type used for the log - hash_function_type hash_function; - }; - -private: - context &m_context; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) - -public: - /// \brief Constructor from access log - /// \param context Context with access log and initial root hash - explicit replay_send_cmio_state_access(replay_send_cmio_state_access::context &context) : m_context{context} { - if (m_context.accesses.empty()) { - throw std::invalid_argument{"the access log has no accesses"}; - } - } - - machine_hash get_root_hash() const { - return m_context.root_hash; - } - - /// \brief Checks if access log was fully consumed after reset operation is finished - void finish() { - if (m_context.next_access != m_context.accesses.size()) { - throw std::invalid_argument{"access log was not fully consumed"}; - } - } - -private: - friend i_state_access; - - std::string access_to_report() const { - auto index = m_context.next_access + 1; - switch (index) { - case 1: - return "1st access"; - case 2: - return "2nd access"; - case 3: - return "3rd access"; - default: - return std::to_string(index) + "th access"; - } - } - - template - static void get_hash(H &h, const access_data &data, machine_hash &hash) { - get_merkle_tree_hash(h, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, hash); - } - - /// \brief Checks a logged read and advances log. - /// \param paligned Physical address in the machine state, - /// aligned to the access size. - /// \param log2_size Log2 of access size. - /// \param text Textual description of the access. - /// \returns Value read. - uint64_t check_read(uint64_t paligned, const char *text) const { - static_assert(HASH_TREE_LOG2_WORD_SIZE >= log2_size_v, - "Hash tree word size must be at least as large as a machine word"); - if ((paligned & (sizeof(uint64_t) - 1)) != 0) { - // This is never reached by coverage because replay only uses check_read to check iflags_Y - // LCOV_EXCL_START - throw std::invalid_argument{"address not aligned to word size"}; - // LCOV_EXCL_STOP - } - if (m_context.next_access >= m_context.accesses.size()) { - // This is never reached by coverage because replay checks one read and its the first access - // If we truncate before the read, there will be zero accesses and another error triggers first - // LCOV_EXCL_START - throw std::invalid_argument{"too few accesses in log"}; - // LCOV_EXCL_STOP - } - const auto &access = m_context.accesses[m_context.next_access]; - if (access.get_type() != access_type::read) { - throw std::invalid_argument{"expected " + access_to_report() + " to read " + text}; - } - if (access.get_address() != paligned) { - // This is never reached by coverage because we only use check_read to check iflags_Y - // LCOV_EXCL_START - std::ostringstream err; - err << "expected " << access_to_report() << " to read " << text << " address 0x" << std::hex << paligned - << "(" << std::dec << paligned << ")"; - throw std::invalid_argument{err.str()}; - // LCOV_EXCL_STOP - } - if (access.get_log2_size() != log2_size_v) { - throw std::invalid_argument{"expected " + access_to_report() + " to read 2^" + - std::to_string(HASH_TREE_LOG2_WORD_SIZE) + " bytes from " + text}; - } - if (!access.get_read().has_value()) { - throw std::invalid_argument{"missing read " + std::string(text) + " data at " + access_to_report()}; - } - // NOLINTBEGIN(bugprone-unchecked-optional-access) - const auto &read_data = access.get_read().value(); - if (read_data.size() != HASH_TREE_WORD_SIZE) { - throw std::invalid_argument{"expected read " + std::string(text) + " data to contain 2^" + - std::to_string(HASH_TREE_LOG2_WORD_SIZE) + " bytes at " + access_to_report()}; - } - variant_hasher h{m_context.hash_function}; - // check if logged read data hashes to the logged read hash - machine_hash computed_read_hash{}; - get_hash(h, read_data, computed_read_hash); - if (access.get_read_hash() != computed_read_hash) { - throw std::invalid_argument{"logged read data of " + std::string(text) + - " data does not hash to the logged read hash at " + access_to_report()}; - } - // NOLINTEND(bugprone-unchecked-optional-access) - // check proof - auto proof = access.make_proof(m_context.root_hash); - if (!proof.verify(h)) { - throw std::invalid_argument{"Mismatch in root hash of " + access_to_report()}; - } - m_context.next_access++; - const uint64_t pleaf_aligned = paligned & ~(HASH_TREE_WORD_SIZE - 1); - const int word_offset = static_cast(paligned - pleaf_aligned); - return get_word_access_data(read_data, word_offset); - } - - /// \brief Checks a logged word write and advances log. - /// \param paligned Physical address in the machine state, - /// aligned to a 64-bit word. - /// \param word Word value to write. - /// \param text Textual description of the access. - void check_write(uint64_t paligned, uint64_t word, const char *text) const { - static_assert(HASH_TREE_LOG2_WORD_SIZE >= log2_size_v, - "Hash tree word size must be at least as large as a machine word"); - if ((paligned & (sizeof(uint64_t) - 1)) != 0) { - throw std::invalid_argument{"paligned not aligned to word size"}; - } - if (m_context.next_access >= m_context.accesses.size()) { - throw std::invalid_argument{"too few accesses in log"}; - } - const auto &access = m_context.accesses[m_context.next_access]; - if (access.get_type() != access_type::write) { - throw std::invalid_argument{"expected " + access_to_report() + " to write " + text}; - } - if (access.get_address() != paligned) { - std::ostringstream err; - err << "expected " << access_to_report() << " to write " << text << " to address 0x" << std::hex << paligned - << "(" << std::dec << paligned << ")"; - throw std::invalid_argument{err.str()}; - } - if (access.get_log2_size() != log2_size_v) { - throw std::invalid_argument{"expected " + access_to_report() + " to write 2^" + - std::to_string(HASH_TREE_LOG2_WORD_SIZE) + " bytes to " + text}; - } - // NOLINTBEGIN(bugprone-unchecked-optional-access) - // check read - if (!access.get_read().has_value()) { - throw std::invalid_argument{"missing read " + std::string(text) + " data at " + access_to_report()}; - } - const auto &read_data = access.get_read().value(); - if (read_data.size() != HASH_TREE_WORD_SIZE) { - throw std::invalid_argument{"expected overwritten data from " + std::string(text) + " to contain 2^" + - std::to_string(access.get_log2_size()) + " bytes at " + access_to_report()}; - } - variant_hasher h{m_context.hash_function}; - // check if read data hashes to the logged read hash - machine_hash computed_read_hash{}; - get_hash(h, read_data, computed_read_hash); - if (access.get_read_hash() != computed_read_hash) { - throw std::invalid_argument{"logged read data of " + std::string(text) + - " does not hash to the logged read hash at " + access_to_report()}; - } - // check write - if (!access.get_written_hash().has_value()) { - throw std::invalid_argument{"missing written " + std::string(text) + " hash at " + access_to_report()}; - } - const auto &written_hash = access.get_written_hash().value(); - if (!access.get_written().has_value()) { - throw std::invalid_argument{"missing written " + std::string(text) + " data at " + access_to_report()}; - } - const auto &written_data = access.get_written().value(); - if (written_data.size() != read_data.size()) { - throw std::invalid_argument{"expected written " + std::string(text) + " data to contain 2^" + - std::to_string(access.get_log2_size()) + " bytes at " + access_to_report()}; - } - // check if written data hashes to the logged written hash - machine_hash computed_written_hash{}; - get_hash(h, written_data, computed_written_hash); - if (written_hash != computed_written_hash) { - throw std::invalid_argument{"logged written data of " + std::string(text) + - " does not hash to the logged written hash at " + access_to_report()}; - } - // check if word being written matches the logged data - const uint64_t pleaf_aligned = paligned & ~(HASH_TREE_WORD_SIZE - 1); - const int word_offset = static_cast(paligned - pleaf_aligned); - const uint64_t logged_word = get_word_access_data(written_data, word_offset); - if (word != logged_word) { - throw std::invalid_argument{"value being written to " + std::string(text) + - " does not match the logged written value at " + access_to_report()}; - } - // check if logged written data differs from the logged read data only by the written word - access_data expected_written_data(read_data); // make a copy of read data - replace_word_access_data(word, expected_written_data, word_offset); // patch with written word - if (written_data != expected_written_data) { - throw std::invalid_argument{"logged written data of " + std::string(text) + - " doesn't differ from the logged read data only by the written word at " + access_to_report()}; - } - // NOLINTEND(bugprone-unchecked-optional-access) - // check proof - auto proof = access.make_proof(m_context.root_hash); - if (!proof.verify(h)) { - throw std::invalid_argument{"Mismatch in root hash of " + access_to_report()}; - } - // Update root hash to reflect the data written by this access - m_context.root_hash = proof.bubble_up(h, written_hash); - m_context.next_access++; - } - - void do_write_iflags_Y(uint64_t val) const { - check_write(machine_reg_address(machine_reg::iflags_Y), val, "iflags.Y"); - } - - uint64_t do_read_iflags_Y() const { - return check_read(machine_reg_address(machine_reg::iflags_Y), "iflags.Y"); - } - - void do_write_htif_fromhost(uint64_t val) const { - check_write(machine_reg_address(machine_reg::htif_fromhost), val, "htif.fromhost"); - } - - void do_write_memory_with_padding(uint64_t paddr, const unsigned char *data, uint64_t data_length, - int write_length_log2_size) const { - variant_hasher h{m_context.hash_function}; - if (data == nullptr) { - throw std::invalid_argument("data is null"); - } - const uint64_t write_length = static_cast(1) << write_length_log2_size; - if (write_length < data_length) { - throw std::invalid_argument{"write length is less than data length"}; - } - const auto text = std::string("cmio rx buffer"); - if (m_context.next_access >= m_context.accesses.size()) { - throw std::invalid_argument{"too few accesses in log"}; - } - const auto &access = m_context.accesses[m_context.next_access]; - if (access.get_address() != paddr) { - throw std::invalid_argument{"expected address of " + access_to_report() + " to match address of " + text}; - } - if (access.get_log2_size() != write_length_log2_size) { - throw std::invalid_argument{"expected " + access_to_report() + " to write 2^" + - std::to_string(write_length_log2_size) + " bytes to " + text}; - } - if (access.get_type() != access_type::write) { - throw std::invalid_argument{"expected " + access_to_report() + " to write " + text}; - } - // NOLINTBEGIN(bugprone-unchecked-optional-access) - // if read data is available then its hash and the logged read hash must match - if (access.get_read().has_value()) { - machine_hash computed_logged_data_hash{}; - get_hash(h, access.get_read().value(), computed_logged_data_hash); - if (computed_logged_data_hash != access.get_read_hash()) { - throw std::invalid_argument{ - "hash of read data and read hash at " + access_to_report() + " does not match read hash"}; - } - } - if (!access.get_written_hash().has_value()) { - throw std::invalid_argument{"write " + access_to_report() + " has no written hash"}; - } - const auto &written_hash = access.get_written_hash().value(); - // compute hash of data argument padded with zeroes - machine_hash computed_data_hash{}; - auto scratch = make_unique_calloc(write_length, std::nothrow_t{}); - if (!scratch) { - throw std::runtime_error("failed to allocate scratch memory"); - } - memcpy(scratch.get(), data, data_length); - if (write_length > data_length) { - memset(scratch.get() + data_length, 0, write_length - data_length); - } - get_merkle_tree_hash(h, std::span{scratch.get(), static_cast(write_length)}, - HASH_TREE_WORD_SIZE, computed_data_hash); - // check if logged written hash matches the computed data hash - if (written_hash != computed_data_hash) { - throw std::invalid_argument{"logged written hash of " + text + - " does not match the hash of data argument at " + access_to_report()}; - } - if (access.get_written().has_value()) { - // if written data is available then its hash and the logged written hash must match - machine_hash computed_hash; - get_hash(h, access.get_written().value(), computed_hash); - if (computed_hash != written_hash) { - throw std::invalid_argument{"written hash and written data mismatch at " + access_to_report()}; - } - } - // NOLINTEND(bugprone-unchecked-optional-access) - // check proof - auto proof = access.make_proof(m_context.root_hash); - if (!proof.verify(h)) { - throw std::invalid_argument{"Mismatch in root hash of " + access_to_report()}; - } - // Update root hash to reflect the data written by this access - m_context.root_hash = proof.bubble_up(h, written_hash); - m_context.next_access++; - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - constexpr const char *do_get_name() const { - return "replay_send_cmio_state_access"; - } -}; - -} // namespace cartesi - -#endif diff --git a/src/replay-step-state-access.hpp b/src/replay-step-state-access.hpp index bfbdce978..4327bb7d3 100644 --- a/src/replay-step-state-access.hpp +++ b/src/replay-step-state-access.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -38,53 +39,91 @@ #include "mock-address-range.hpp" #include "pmas-constants.hpp" #include "pmas.hpp" +#include "rejected-manual-yield.hpp" #include "riscv-constants.hpp" #include "shadow-registers.hpp" #include "shadow-tlb.hpp" +#include "step-log-layout.hpp" +#include "step-log.hpp" #include "strict-aliasing.hpp" #include "throw.hpp" -#include "variant-hasher.hpp" namespace cartesi { -using hash_type = unsigned char (*)[MACHINE_HASH_SIZE]; -using const_hash_type = const unsigned char (*)[MACHINE_HASH_SIZE]; - -#ifdef ZKARCHITECTURE - -extern "C" void zk_merkle_tree_hash(hash_function_type hash_function, const unsigned char *data, size_t size, - hash_type hash); - -extern "C" void zk_concat_hash(hash_function_type hash_function, const_hash_type left, const_hash_type right, - hash_type result); - -static void merkle_tree_hash(hash_function_type hash_function, const unsigned char *data, size_t size, hash_type hash) { - zk_merkle_tree_hash(hash_function, data, size, hash); -} - -static void concat_hash(hash_function_type hash_function, const_hash_type left, const_hash_type right, - hash_type result) { - zk_concat_hash(hash_function, left, right, result); +// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast,misc-no-recursion) + +/// Merkle hash of the subtree [start, start + 2^log2_size) of `data` (data_length +/// bytes, zero-padded beyond). A subtree entirely past the data collapses to the +/// precomputed pristine-zero hash for its level; a subtree entirely within falls +/// through to merkle_tree_hash; only the boundary subtree at each level recurses. +/// Mirrors HashTree.merkleSubtreeHashPadded in the Solidity replayer. Heap-free so it +/// builds in the risc0 guest. +static machine_hash merkle_subtree_hash_padded(hash_function_type hash_function, const unsigned char *data, + uint64_t data_length, uint64_t start, int log2_size, const machine_hash *pristine) { + constexpr int word_log2 = HASH_TREE_LOG2_WORD_SIZE; + constexpr uint64_t word_size = UINT64_C(1) << word_log2; + if (start >= data_length) { + return pristine[log2_size]; + } + machine_hash result{}; + if (log2_size == word_log2) { + // Leaf. One straddling the data boundary is zero-padded on the right, + // matching the rx-buffer post-state after the copy_n + fill_n write. + if (start + word_size <= data_length) { + merkle_tree_hash(hash_function, data + start, word_size, reinterpret_cast(&result)); + } else { + std::array buf{}; + std::memcpy(buf.data(), data + start, static_cast(data_length - start)); + merkle_tree_hash(hash_function, buf.data(), word_size, reinterpret_cast(&result)); + } + return result; + } + const uint64_t size = UINT64_C(1) << log2_size; + if (start + size <= data_length) { + merkle_tree_hash(hash_function, data + start, size, reinterpret_cast(&result)); + return result; + } + const uint64_t half = size >> 1; + const machine_hash left = + merkle_subtree_hash_padded(hash_function, data, data_length, start, log2_size - 1, pristine); + const machine_hash right = + merkle_subtree_hash_padded(hash_function, data, data_length, start + half, log2_size - 1, pristine); + concat_hash(hash_function, reinterpret_cast(&left), reinterpret_cast(&right), + reinterpret_cast(&result)); + return result; } -#else - -static void merkle_tree_hash(hash_function_type hash_function, const unsigned char *data, size_t size, hash_type hash) { - variant_hasher h{hash_function}; - get_merkle_tree_hash(h, std::span{data, size}, HASH_TREE_WORD_SIZE, - machine_hash_view{*hash, MACHINE_HASH_SIZE}); +/// Merkle hash of `data` (data_length bytes) zero-padded to 2^write_length_log2_size. +static void merkle_tree_hash_padded(hash_function_type hash_function, const unsigned char *data, uint64_t data_length, + int write_length_log2_size, hash_type hash) { + if (write_length_log2_size <= HASH_TREE_LOG2_WORD_SIZE || write_length_log2_size >= HASH_TREE_LOG2_ROOT_SIZE) { + THROW(std::invalid_argument, "write_length_log2_size out of range"); + } + const uint64_t write_length = UINT64_C(1) << write_length_log2_size; + if (data_length > write_length) { + THROW(std::invalid_argument, "data_length exceeds padded write length"); + } + if (data == nullptr && data_length != 0) { + THROW(std::invalid_argument, "data is null but data_length is non-zero"); + } + constexpr int word_log2 = HASH_TREE_LOG2_WORD_SIZE; + constexpr uint64_t word_size = UINT64_C(1) << word_log2; + // Pristine-zero hash per level, built from merkle_tree_hash + concat_hash alone + // (heap-free for the ZK guest). + std::array pristine{}; + std::array zero_word{}; + merkle_tree_hash(hash_function, zero_word.data(), zero_word.size(), + reinterpret_cast(&pristine[word_log2])); + for (int k = word_log2 + 1; k <= write_length_log2_size; ++k) { + concat_hash(hash_function, reinterpret_cast(&pristine[k - 1]), + reinterpret_cast(&pristine[k - 1]), reinterpret_cast(&pristine[k])); + } + const machine_hash root = + merkle_subtree_hash_padded(hash_function, data, data_length, 0, write_length_log2_size, pristine.data()); + *reinterpret_cast(hash) = root; } -static void concat_hash(hash_function_type hash_function, const_hash_type left, const_hash_type right, - hash_type result) { - variant_hasher h{hash_function}; - // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) - get_concat_hash(h, *reinterpret_cast(left), *reinterpret_cast(right), - *reinterpret_cast(result)); - // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) -} - -#endif +// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast,misc-no-recursion) class replay_step_state_access; @@ -94,56 +133,21 @@ struct i_state_access_fast_addr { using type = host_addr; }; -// \brief checks if a buffer is large enough to hold a data block of N elements of size S starting at a given offset -// \param max The maximum offset allowed -// \param current The current offset -// \param elsize The size of each element -// \param elcount The number of elements -// \param next Receives the start offset of the next data block -// \return true if the buffer is large enough and data doesn't overflow, false otherwise -static inline bool validate_and_advance_offset(uint64_t max, uint64_t current, uint64_t elsize, uint64_t elcount, - uint64_t *next) { - uint64_t size{}; - if (__builtin_mul_overflow(elsize, elcount, &size)) { - return false; - } - if (__builtin_add_overflow(current, size, next)) { - return false; - } - return *next <= max; -} - // \brief Provides machine state from a step log file class replay_step_state_access : public i_state_access, public i_accept_scoped_notes, public i_prefer_shadow_state { public: - using address_type = uint64_t; - using data_type = unsigned char[AR_PAGE_SIZE]; - - struct PACKED page_type { - address_type index; - data_type data; - machine_hash hash; - }; - struct context { - machine_hash logged_root_hash_before{}; ///< Root hash before the step (from log header) - uint64_t logged_mcycle_count{0}; ///< Number of mcycles in the step (from log header) - machine_hash logged_root_hash_after{}; ///< Root hash after the step (from log header) - hash_function_type hash_function{hash_function_type::keccak256}; ///< Hash function used for the step log - uint64_t page_count{0}; ///< Number of pages in the step log - page_type *pages{nullptr}; ///< Array of page data - uint64_t sibling_count{0}; ///< Number of sibling hashes in the step log - machine_hash *sibling_hashes{nullptr}; ///< Array of sibling hashes - mock_address_ranges ars{}; ///< Array of address ranges - hot_tlb_state tlb{}; ///< Hot TLB cache for validated entries + step_log log; ///< Parsed step log (witnessed tree) + mock_address_ranges ars{}; ///< Array of address ranges + hot_tlb_state tlb{}; ///< Hot TLB cache for validated entries }; private: - context &m_context; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) - mutable page_type *m_shadow_regs_page{nullptr}; ///< cache shadow registers page + context &m_context; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + mutable page_entry *m_shadow_regs_page{nullptr}; ///< cache shadow registers page public: // \brief Construct a replay_step_state_access object from a log image @@ -152,113 +156,7 @@ class replay_step_state_access : // \param log_size The size of the log data // \throw runtime_error if the initial root hash does not match or the log data is invalid replay_step_state_access(context &context, unsigned char *log_image, uint64_t log_size) : m_context(context) { - // relevant offsets in the log data - uint64_t mcycle_count_offset{}; - uint64_t root_hash_after_offset{}; - uint64_t hash_function_offset{}; - uint64_t page_count_offset{}; - uint64_t first_page_offset{}; - uint64_t first_sibling_offset{}; - uint64_t sibling_count_offset{}; - uint64_t end_offset{}; // end of the log data - // read root_hash_before - if (!validate_and_advance_offset(log_size, 0, sizeof(m_context.logged_root_hash_before), 1, - &mcycle_count_offset)) { - THROW(std::runtime_error, "root hash before past end of step log"); - } - std::copy_n(log_image, sizeof(m_context.logged_root_hash_before), m_context.logged_root_hash_before.data()); - // read mcycle_count - if (!validate_and_advance_offset(log_size, mcycle_count_offset, sizeof(m_context.logged_mcycle_count), 1, - &root_hash_after_offset)) { - THROW(std::runtime_error, "mcycle count past end of step log"); - } - m_context.logged_mcycle_count = aliased_aligned_read(log_image + mcycle_count_offset); - // read root_hash_after - if (!validate_and_advance_offset(log_size, root_hash_after_offset, sizeof(m_context.logged_root_hash_after), 1, - &hash_function_offset)) { - THROW(std::runtime_error, "root hash after past end of step log"); - } - std::copy_n(log_image + root_hash_after_offset, sizeof(m_context.logged_root_hash_after), - m_context.logged_root_hash_after.data()); - // hash function type - uint64_t temp_hash_function{}; - if (!validate_and_advance_offset(log_size, hash_function_offset, sizeof(temp_hash_function), 1, - &page_count_offset)) { - THROW(std::runtime_error, "hash function type past end of step log"); - } - temp_hash_function = aliased_aligned_read(log_image + hash_function_offset); - switch (temp_hash_function) { - case static_cast(hash_function_type::keccak256): - m_context.hash_function = hash_function_type::keccak256; - break; - case static_cast(hash_function_type::sha256): - m_context.hash_function = hash_function_type::sha256; - break; - default: - THROW(std::runtime_error, "invalid log format: unsupported hash function type"); - } - - // set page count - if (!validate_and_advance_offset(log_size, page_count_offset, sizeof(m_context.page_count), 1, - &first_page_offset)) { - THROW(std::runtime_error, "page count past end of step log"); - } - m_context.page_count = aliased_aligned_read(log_image + page_count_offset); - if (m_context.page_count == 0) { - THROW(std::runtime_error, "page count is zero"); - } - // set page data - if (!validate_and_advance_offset(log_size, first_page_offset, sizeof(page_type), m_context.page_count, - &sibling_count_offset)) { - THROW(std::runtime_error, "page data past end of step log"); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - m_context.pages = reinterpret_cast(log_image + first_page_offset); - - // set sibling count and hashes - if (!validate_and_advance_offset(log_size, sibling_count_offset, sizeof(m_context.sibling_count), 1, - &first_sibling_offset)) { - THROW(std::runtime_error, "sibling count past end of step log"); - } - m_context.sibling_count = aliased_aligned_read(log_image + sibling_count_offset); - - // set sibling hashes - if (!validate_and_advance_offset(log_size, first_sibling_offset, sizeof(machine_hash), m_context.sibling_count, - &end_offset)) { - THROW(std::runtime_error, "sibling hashes past end of step log"); - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - m_context.sibling_hashes = reinterpret_cast(log_image + first_sibling_offset); - - // ensure that we read exactly the expected log size - if (end_offset != log_size) { - THROW(std::runtime_error, "extra data at end of step log"); - } - - // ensure that the page indexes are in increasing order - // and that the scratch hash area is all zeros - static const machine_hash all_zeros{}; - for (uint64_t i = 0; i < m_context.page_count; i++) { - if (i > 0 && m_context.pages[i - 1].index >= m_context.pages[i].index) { - THROW(std::runtime_error, "invalid log format: page index is not in increasing order"); - } - // In the current implementation, this check is unnecessary - // But we may in the future change the data field to point to independently allocated pages - // This would break the code that uses binary search to find the page based on the address of its data - // LCOV_EXCL_START - if (i > 0 && +m_context.pages[i - 1].data >= +m_context.pages[i].data) { - THROW(std::runtime_error, "invalid log format: page data is not in increasing order"); - } - // LCOV_EXCL_STOP - if (m_context.pages[i].hash != all_zeros) { - THROW(std::runtime_error, "invalid log format: page scratch hash area is not zero"); - } - } - // compute and check the machine root hash before the replay - auto computed_root_hash_before = compute_root_hash(); - if (computed_root_hash_before != m_context.logged_root_hash_before) { - THROW(std::runtime_error, "initial root hash mismatch"); - } + m_context.log = step_log::decode(log_image, log_size); // initialize hot TLB entries as unverified for (auto set_index : {TLB_CODE, TLB_READ, TLB_WRITE}) { for (uint64_t slot_index = 0; slot_index < TLB_SET_SIZE; ++slot_index) { @@ -269,52 +167,42 @@ class replay_step_state_access : } // \brief Finish the replay and check the final machine root hash + // \param revert_on_rejected_yield Whether a machine left paused on a manual rejected yield makes the + // recorded revert root hash the canonical post-operation hash. // \throw runtime_error if the final root hash does not match - void finish() { - // compute and check machine root hash after the replay - auto computed_final_root_hash = compute_root_hash(); - if (computed_final_root_hash != m_context.logged_root_hash_after) { + // \details A step or uarch reset that leaves the machine paused on a rejected input reverts: its + // canonical post-operation hash is the recorded revert root hash. A send_cmio_response is not a step; + // when it no-ops on an already-rejected machine the transition is the identity, so it never reverts + // and its post-operation hash is the recomputed machine root hash. + void finish(bool revert_on_rejected_yield = true) { + machine_hash expected_final_root_hash{}; + if (revert_on_rejected_yield && is_rejected_manual_yield(*this)) { + // Revert substitutes the recorded root instead of recomputing it; still assert no node was + // left unconsumed (compute_root_hash makes this assertion on the non-reverted path). + m_context.log.check_all_nodes_consumed(); + expected_final_root_hash = read_revert_root_hash(); + } else { + expected_final_root_hash = m_context.log.compute_root_hash(true); + } + if (expected_final_root_hash != m_context.log.root_hash_after) { THROW(std::runtime_error, "final root hash mismatch"); } } private: - /// \brief Try to find a page in the logged data by its physical address - /// \param paddr The physical address of the page - /// \return A pointer to the page_type structure if found, nullptr otherwise - page_type *try_find_page(uint64_t paddr_page) const { - const auto page_index = paddr_page >> AR_LOG2_PAGE_SIZE; - auto pages = std::ranges::views::counted(m_context.pages, static_cast(m_context.page_count)); - auto it = std::ranges::lower_bound(pages, page_index, std::ranges::less{}, - [](const auto &page) { return page.index; }); - if (it != pages.end() && it->index == page_index) { - return &(*it); - } - return nullptr; - } - /// \brief Try to find a page in the logged data by the host address of its data /// \param haddr Host address of page data - /// \return A pointer to the page_type structure if found, nullptr otherwise - page_type *try_find_page(host_addr haddr_page) const { - auto pages = std::ranges::views::counted(m_context.pages, static_cast(m_context.page_count)); - auto it = std::ranges::lower_bound(pages, haddr_page, std::ranges::less{}, + /// \return A pointer to the page_entry structure if found, nullptr otherwise + page_entry *try_find_page(host_addr haddr_page) const { + auto it = std::ranges::lower_bound(m_context.log.pages, haddr_page, std::ranges::less{}, [](const auto &page) { return cast_ptr_to_host_addr(page.data); }); - if (it != pages.end() && cast_ptr_to_host_addr(it->data) == haddr_page) { + if (it != m_context.log.pages.end() && cast_ptr_to_host_addr(it->data) == haddr_page) { return &(*it); } return nullptr; } - page_type *find_page(uint64_t paddr_page) const { - auto *page_log = try_find_page(paddr_page); - if (page_log == nullptr) { - THROW(std::runtime_error, "required page not found"); - } - return page_log; - } - - page_type *find_page(host_addr haddr_page) const { + page_entry *find_page(host_addr haddr_page) const { auto *page_log = try_find_page(haddr_page); // The only caller is do_write_tlb, which receives vh_offset from the interpreter's page walk. // The interpreter computes vh_offset from do_get_faddr, which already called find_page(uint64_t) @@ -328,6 +216,18 @@ class replay_step_state_access : return page_log; } + /// \brief Zero the cached scratch hash of the page containing \p haddr so compute_root_hash + /// rehashes it on the after-root pass. + /// \details A write through a raw host address only knows the address, not the page entry. Page + /// data pointers strictly ascend, so the containing page is the last one whose data is <= haddr. + /// \p haddr always falls inside a witnessed page (the interpreter resolved it via do_get_faddr, + /// which find_page'd it), so the lookup never steps before the first page. + void invalidate_page_hash(host_addr haddr) const { + auto it = std::ranges::upper_bound(m_context.log.pages, haddr, std::ranges::less{}, + [](const auto &page) { return cast_ptr_to_host_addr(page.data); }); + (--it)->hash = machine_hash{}; + } + static_assert(sizeof(shadow_registers_state) <= AR_PAGE_SIZE, "shadow registers must fit in a single page"); host_addr get_shadow_reg_host_addr(shadow_registers_what what) const { @@ -335,86 +235,19 @@ class replay_step_state_access : const auto page = paddr & ~PAGE_OFFSET_MASK; const auto offset = paddr & PAGE_OFFSET_MASK; if (m_shadow_regs_page == nullptr) { - m_shadow_regs_page = find_page(page); + m_shadow_regs_page = m_context.log.find_page(page); } return cast_ptr_to_host_addr(m_shadow_regs_page->data) + offset; } - // \brief Compute the current machine root hash - machine_hash compute_root_hash() { - //??D Here we should only do this for dirty pages, right? - //??D Initially, all pages are dirty, because we don't know their hashes - //??D But in the end, we should only update those pages that we touched - //??D May improve performance when we are running this on ZK - for (uint64_t i = 0; i < m_context.page_count; i++) { - merkle_tree_hash(m_context.hash_function, m_context.pages[i].data, AR_PAGE_SIZE, - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - reinterpret_cast(&m_context.pages[i].hash)); - } - size_t next_page = 0; - size_t next_sibling = 0; - auto root_hash = - compute_root_hash_impl(0, HASH_TREE_LOG2_ROOT_SIZE - AR_LOG2_PAGE_SIZE, next_page, next_sibling); - // In the current implementation, recursion is guided by the pages, so they are always consumed - // So this can never happen. - if (next_page != m_context.page_count) { - THROW(std::runtime_error, "too many pages in log"); - } - if (next_sibling != m_context.sibling_count) { - THROW(std::runtime_error, "too many sibling hashes in log"); - } - return root_hash; - } - - // \brief Compute the root hash of a memory range recursively - // \param page_index Index of the first page in the range - // \param log2_page_count Log2 of the size of number of pages in the range - // \param next_page Index of the next page to be visited - // \param next_sibling Index of the next sibling hash to be visited - // \return Resulting root hash of the range - machine_hash compute_root_hash_impl(address_type page_index, int log2_page_count, size_t &next_page, - size_t &next_sibling) { - // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)) - auto page_count = UINT64_C(1) << log2_page_count; - if (next_page >= m_context.page_count || page_index + page_count <= m_context.pages[next_page].index) { - if (next_sibling >= m_context.sibling_count) { - THROW(std::runtime_error, "too few sibling hashes in log"); - } - return m_context.sibling_hashes[next_sibling++]; - } - if (log2_page_count > 0) { - auto left = compute_root_hash_impl(page_index, log2_page_count - 1, next_page, next_sibling); - const auto halfway_page_index = page_index + (page_count >> 1); - auto right = compute_root_hash_impl(halfway_page_index, log2_page_count - 1, next_page, next_sibling); - machine_hash hash{}; - concat_hash(m_context.hash_function, reinterpret_cast(&left), - reinterpret_cast(&right), reinterpret_cast(&hash)); - return hash; - } - if (m_context.pages[next_page].index == page_index) { - return m_context.pages[next_page++].hash; - } - // To reach here, we need all three conditions at leaf level (log2_page_count == 0): - // 1. Line 360 false: pages[next_page].index <= page_index - // 2. Line 366 false: log2_page_count == 0 (leaf) - // 3. Line 375 false: pages[next_page].index != page_index - // That requires pages[next_page].index < page_index. But with sorted pages consumed left-to-right, - // that can't happen -- the next unconsumed page always has index >= current leaf's page_index. - // LCOV_EXCL_START - if (next_sibling >= m_context.sibling_count) { - THROW(std::runtime_error, "too few sibling hashes in log"); - } - return m_context.sibling_hashes[next_sibling++]; - // LCOV_EXCL_STOP - // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)) - } - - uint64_t check_read_reg(shadow_registers_what what) const { + uint64_t read_shadow_reg(shadow_registers_what what) const { return aliased_aligned_read(get_shadow_reg_host_addr(what)); } - void check_write_reg(shadow_registers_what what, uint64_t val) const { - aliased_aligned_write(get_shadow_reg_host_addr(what), val); + void write_shadow_reg(shadow_registers_what what, uint64_t val) const { + const auto haddr = get_shadow_reg_host_addr(what); + m_shadow_regs_page->hash = machine_hash{}; // written page: rehash on the after-root pass + aliased_aligned_write(haddr, val); } uint64_t read_pmas_istart(uint64_t index) const { @@ -433,11 +266,19 @@ class replay_step_state_access : friend i_prefer_shadow_state; uint64_t do_read_shadow_register(shadow_registers_what what) const { - return check_read_reg(what); + return read_shadow_reg(what); } void do_write_shadow_register(shadow_registers_what what, uint64_t val) const { - check_write_reg(what, val); + write_shadow_reg(what, val); + } + + machine_hash do_read_revert_root_hash() const { + constexpr uint64_t paddr = AR_SHADOW_REVERT_ROOT_HASH_START; + const auto *page_log = m_context.log.find_page(paddr & ~PAGE_OFFSET_MASK); + machine_hash hash{}; + std::copy_n(page_log->data + (paddr & PAGE_OFFSET_MASK), hash.size(), hash.begin()); + return hash; } // ----- @@ -452,7 +293,7 @@ class replay_step_state_access : // This assumes the corresponding page has been touched // (replay_step_state_access makes sure of it for any address we try to convert) const auto paddr_page = paddr & ~PAGE_OFFSET_MASK; - auto *page_log = find_page(paddr_page); + auto *page_log = m_context.log.find_page(paddr_page); const auto offset = paddr & PAGE_OFFSET_MASK; return cast_ptr_to_host_addr(page_log->data) + offset; } @@ -502,11 +343,12 @@ class replay_step_state_access : template void do_write_memory_word(host_addr haddr, uint64_t /* pma_index */, T val) const { + invalidate_page_hash(haddr); aliased_aligned_write(haddr, val); } template - auto check_read_tlb(TLB_set_index set_index, uint64_t slot_index, shadow_tlb_what what) const { + auto read_tlb_field(TLB_set_index set_index, uint64_t slot_index, shadow_tlb_what what) const { const auto haddr = do_get_faddr(shadow_tlb_get_abs_addr(set_index, slot_index, what)); return aliased_aligned_read(haddr); } @@ -523,12 +365,13 @@ class replay_step_state_access : template uint64_t do_read_tlb_pma_index(uint64_t slot_index) const { - return check_read_tlb(SET, slot_index, shadow_tlb_what::pma_index); + return read_tlb_field(SET, slot_index, shadow_tlb_what::pma_index); } template - auto check_write_tlb(TLB_set_index set_index, uint64_t slot_index, shadow_tlb_what what, TYPE val) const { + auto write_tlb_field(TLB_set_index set_index, uint64_t slot_index, shadow_tlb_what what, TYPE val) const { const auto haddr = do_get_faddr(shadow_tlb_get_abs_addr(set_index, slot_index, what)); + invalidate_page_hash(haddr); aliased_aligned_write(haddr, val); } @@ -540,10 +383,10 @@ class replay_step_state_access : return hot_slot.vaddr_page; } // Read shadow entry from the log - const auto vaddr_page = check_read_tlb(SET, slot_index, shadow_tlb_what::vaddr_page); - const auto vp_offset = check_read_tlb(SET, slot_index, shadow_tlb_what::vp_offset); - const auto pma_index = check_read_tlb(SET, slot_index, shadow_tlb_what::pma_index); - const auto zero_padding = check_read_tlb(SET, slot_index, shadow_tlb_what::zero_padding_); + const auto vaddr_page = read_tlb_field(SET, slot_index, shadow_tlb_what::vaddr_page); + const auto vp_offset = read_tlb_field(SET, slot_index, shadow_tlb_what::vp_offset); + const auto pma_index = read_tlb_field(SET, slot_index, shadow_tlb_what::pma_index); + const auto zero_padding = read_tlb_field(SET, slot_index, shadow_tlb_what::zero_padding_); const auto &ar = do_read_pma(pma_index); if (shadow_tlb_verify_slot(vaddr_page, vp_offset, zero_padding, ar) == TLB_INVALID_PAGE) { hot_slot.vaddr_page = TLB_INVALID_PAGE; @@ -551,7 +394,7 @@ class replay_step_state_access : } // Find the target page in the log and compute vh_offset pointing into log data const auto paddr_page = vaddr_page + vp_offset; - const auto haddr_page = cast_ptr_to_host_addr(find_page(paddr_page)->data); + const auto haddr_page = cast_ptr_to_host_addr(m_context.log.find_page(paddr_page)->data); const auto vh_offset = haddr_page - vaddr_page; // Verification passed -- promote to hot entry hot_slot.vaddr_page = vaddr_page; @@ -567,16 +410,16 @@ class replay_step_state_access : template void do_write_tlb(uint64_t slot_index, uint64_t vaddr_page, host_addr vh_offset, uint64_t pma_index) const { assert(vaddr_page != TLB_UNVERIFIED_PAGE); - check_write_tlb(SET, slot_index, shadow_tlb_what::vaddr_page, vaddr_page); + write_tlb_field(SET, slot_index, shadow_tlb_what::vaddr_page, vaddr_page); if (vaddr_page != TLB_INVALID_PAGE) { // Convert vh_offset to vp_offset for the log (shadow stores vp_offset) const auto paddr_page = find_page(vaddr_page + vh_offset)->index << AR_LOG2_PAGE_SIZE; - check_write_tlb(SET, slot_index, shadow_tlb_what::vp_offset, paddr_page - vaddr_page); + write_tlb_field(SET, slot_index, shadow_tlb_what::vp_offset, paddr_page - vaddr_page); } else { - check_write_tlb(SET, slot_index, shadow_tlb_what::vp_offset, static_cast(vh_offset)); + write_tlb_field(SET, slot_index, shadow_tlb_what::vp_offset, static_cast(vh_offset)); } - check_write_tlb(SET, slot_index, shadow_tlb_what::pma_index, pma_index); - check_write_tlb(SET, slot_index, shadow_tlb_what::zero_padding_, UINT64_C(0)); + write_tlb_field(SET, slot_index, shadow_tlb_what::pma_index, pma_index); + write_tlb_field(SET, slot_index, shadow_tlb_what::zero_padding_, UINT64_C(0)); // Mark hot entry as unverified so next access re-validates from the log m_context.tlb[SET][slot_index].vaddr_page = TLB_UNVERIFIED_PAGE; m_context.tlb[SET][slot_index].vh_offset = host_addr{0}; @@ -588,8 +431,48 @@ class replay_step_state_access : } // LCOV_EXCL_STOP - void do_mark_dirty_page(host_addr /* haddr */, uint64_t /* pma_index */) const { - // this is a noop since we have no host machine + /// \brief Verify a padded memory write recorded in the log. + /// \param paddr Destination physical address; must be aligned to (1 << write_length_log2_size). + /// \param data Pointer to source bytes. + /// \param data_length Number of valid bytes at \p data. + /// \param write_length_log2_size Log2 of the full write length (data + padding). + /// \details Supra-page writes hash (data || zero padding) and compare to the logged + /// node's hash_after. Sub-page writes mutate the logged page in place so its new hash + /// feeds the finish()-time root reconstruction. + void do_write_memory_with_padding(uint64_t paddr, const unsigned char *data, uint64_t data_length, + int write_length_log2_size) const { + if (data == nullptr) { + THROW(std::invalid_argument, "data is null"); + } + const uint64_t write_length = UINT64_C(1) << write_length_log2_size; + if (write_length < data_length) { + THROW(std::invalid_argument, "write length is less than data length"); + } + if (write_length_log2_size > HASH_TREE_LOG2_PAGE_SIZE) { + // Supra-page: hash data + zero-pad via pristine-streaming and compare to the logged node. + const auto *node = m_context.log.try_find_node(paddr); + if (node == nullptr || node->log2_size != static_cast(write_length_log2_size)) { + THROW(std::runtime_error, "write_memory_with_padding node not found in log"); + } + machine_hash data_hash{}; + merkle_tree_hash_padded(m_context.log.hash_function, data, data_length, write_length_log2_size, + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(&data_hash)); + if (node->hash_after != data_hash) { + THROW(std::runtime_error, "write_memory_with_padding does not match logged hash"); + } + m_context.log.consumed_node_count++; + return; + } + // Sub-page: mutate the logged page in place so its new hash flows into root reconstruction. + const uint64_t paddr_page = paddr & ~PAGE_OFFSET_MASK; + auto *page_log = m_context.log.find_page(paddr_page); + page_log->hash = machine_hash{}; + const uint64_t offset = paddr & PAGE_OFFSET_MASK; + std::copy_n(data, data_length, page_log->data + offset); + if (write_length > data_length) { + std::fill_n(page_log->data + offset + data_length, write_length - data_length, 0); + } } // NOLINTNEXTLINE(readability-convert-member-functions-to-static) diff --git a/src/send-cmio-response.cpp b/src/send-cmio-response.cpp index 29933bdaa..3f528dd31 100644 --- a/src/send-cmio-response.cpp +++ b/src/send-cmio-response.cpp @@ -21,32 +21,53 @@ #include "address-range-constants.hpp" #include "hash-tree-constants.hpp" -#include "record-send-cmio-state-access.hpp" // IWYU pragma: keep -#include "replay-send-cmio-state-access.hpp" // IWYU pragma: keep -#include "state-access.hpp" // IWYU pragma: keep +#include "htif-constants.hpp" +#include "record-step-state-access.hpp" // IWYU pragma: keep +#include "replay-step-state-access.hpp" // IWYU pragma: keep +#include "state-access.hpp" // IWYU pragma: keep #include "uarch-solidity-compat.hpp" // NOLINTBEGIN(google-readability-casting,misc-const-correctness,modernize-use-auto,hicpp-use-auto,readability-use-std-min-max) namespace cartesi { template -void send_cmio_response(STATE_ACCESS a, uint16 reason, bytes data, uint32 dataLength) { +void send_cmio_response(STATE_ACCESS a, bytes32 revertRootHash, uint16 reason, bytes data, uint32 dataLength) { + // This function cannot fail. When a failure is detected, the operation is a no-op instead, + // so the honest party can always log and prove the resulting state transition. + // A response to a machine that is not waiting on a manual yield is a no-op. if (!readIflagsY(a)) { - throwRuntimeError(a, "iflags.Y is not set"); + return; + } + if (reason == HTIF_YIELD_REASON_ADVANCE_STATE) { + // Advance-state responses are the input boundary of the rollups flow. They only apply to a + // machine waiting for an input on an rx-accepted manual yield. Sending one to a machine that + // yielded manual with any other reason (e.g., rejected an input or threw an exception) is a no-op. + uint64 tohost = readHtifTohost(a); + if (!isYieldedManualWith(tohost, HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED)) { + return; + } } // A zero length data is a valid response. We just skip writing to the rx buffer. + uint32 writeLengthLog2Size = 0; if (dataLength > 0) { // Find the write length: the smallest power of 2 that is >= dataLength and >= tree leaf size - uint32 writeLengthLog2Size = uint32Log2(dataLength); + writeLengthLog2Size = uint32Log2(dataLength); if (writeLengthLog2Size < HASH_TREE_LOG2_WORD_SIZE) { writeLengthLog2Size = HASH_TREE_LOG2_WORD_SIZE; // minimum write size is the tree leaf size } if (uint32ShiftLeft(1, writeLengthLog2Size) < dataLength) { writeLengthLog2Size += 1; } + // A response with data that does not fit in the rx buffer is a no-op if (writeLengthLog2Size > AR_CMIO_RX_BUFFER_LOG2_SIZE) { - throwRuntimeError(a, "CMIO response data is too large"); + return; } + } + // Record the machine root hash to revert to in case the response is eventually rejected. A consumer + // recovers it from the uarch-reset step log (whose reset accesses this slot) to revert to this state + // if the response is later rejected. + writeRevertRootHash(a, revertRootHash); + if (dataLength > 0) { writeMemoryWithPadding(a, AR_CMIO_RX_BUFFER_START, data, dataLength, writeLengthLog2Size); } // Write data length and reason to fromhost @@ -59,15 +80,16 @@ void send_cmio_response(STATE_ACCESS a, uint16 reason, bytes data, uint32 dataLe } // Explicit instantiation for state_access -template void send_cmio_response(state_access a, uint16_t reason, const unsigned char *data, uint32 length); - -// Explicit instantiation for record_state_access -template void send_cmio_response(record_send_cmio_state_access a, uint16_t reason, const unsigned char *data, +template void send_cmio_response(state_access a, bytes32 revertRootHash, uint16_t reason, const unsigned char *data, uint32 length); -// Explicit instantiation for replay_state_access -template void send_cmio_response(replay_send_cmio_state_access a, uint16_t reason, const unsigned char *data, - uint32 length); +// Explicit instantiation for record_step_state_access +template void send_cmio_response(record_step_state_access a, bytes32 revertRootHash, uint16_t reason, + const unsigned char *data, uint32 length); + +// Explicit instantiation for replay_step_state_access +template void send_cmio_response(replay_step_state_access a, bytes32 revertRootHash, uint16_t reason, + const unsigned char *data, uint32 length); } // namespace cartesi // NOLINTEND(google-readability-casting,misc-const-correctness,modernize-use-auto,hicpp-use-auto,readability-use-std-min-max) diff --git a/src/send-cmio-response.hpp b/src/send-cmio-response.hpp index bb5117684..c786a2d04 100644 --- a/src/send-cmio-response.hpp +++ b/src/send-cmio-response.hpp @@ -19,32 +19,34 @@ #include +#include "machine-hash.hpp" + namespace cartesi { /// \brief Sends cmio response /// \tparam STATE_ACCESS State accessor type /// \param a State accessor +/// \param revertRootHash Machine root hash to revert to in case the response is eventually rejected /// \param reason Reason for sending the response /// \param data Response data /// \param length Response data length template -void send_cmio_response(STATE_ACCESS a, uint16_t reason, const unsigned char *data, uint32_t dataLength); +void send_cmio_response(STATE_ACCESS a, const_machine_hash_view revertRootHash, uint16_t reason, + const unsigned char *data, uint32_t dataLength); class state_access; -class record_state_access; -class replay_state_access; +class record_step_state_access; +class replay_step_state_access; -// Declaration of explicit instantiation in module send_cmio_response.cpp -extern template void send_cmio_response(state_access a, uint16_t reason, const unsigned char *data, - uint32_t dataLength); +// Declaration of explicit instantiations in module send-cmio-response.cpp +extern template void send_cmio_response(state_access a, const_machine_hash_view revertRootHash, uint16_t reason, + const unsigned char *data, uint32_t dataLength); -// Declaration of explicit instantiation in module uarch-reset-state.cpp -extern template void send_cmio_response(record_state_access a, uint16_t reason, const unsigned char *data, - uint32_t dataLength); +extern template void send_cmio_response(record_step_state_access a, const_machine_hash_view revertRootHash, + uint16_t reason, const unsigned char *data, uint32_t dataLength); -// Declaration of explicit instantiation in module uarch-reset-state.cpp -extern template void send_cmio_response(replay_state_access a, uint16_t reason, const unsigned char *data, - uint32_t dataLength); +extern template void send_cmio_response(replay_step_state_access a, const_machine_hash_view revertRootHash, + uint16_t reason, const unsigned char *data, uint32_t dataLength); } // namespace cartesi diff --git a/src/state-access.hpp b/src/state-access.hpp index 23d82da87..2f8b5a7fb 100644 --- a/src/state-access.hpp +++ b/src/state-access.hpp @@ -29,6 +29,7 @@ #include "compiler-defines.hpp" #include "host-addr.hpp" #include "i-accept-counters.hpp" +#include "i-accept-dirty-pages.hpp" #include "i-accept-scoped-notes.hpp" #include "i-interactive-state-access.hpp" #include "i-state-access.hpp" @@ -56,7 +57,8 @@ class state_access : public i_state_access, public i_interactive_state_access, public i_accept_scoped_notes, - public i_accept_counters { + public i_accept_counters, + public i_accept_dirty_pages { // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) //??(edubart): Storing reference to the processor state removes an extra indirection when accessing registers, @@ -450,6 +452,10 @@ class state_access : } } + machine_hash do_read_revert_root_hash() const { + return m_s.shadow.revert_root_hash; + } + template void do_read_memory_word(host_addr haddr, uint64_t /* pma_index */, T *pval) const { *pval = aliased_aligned_read(haddr); @@ -494,10 +500,6 @@ class state_access : return m_m.get_host_addr(paddr, pma_index); } - void do_mark_dirty_page(host_addr haddr, uint64_t pma_index) const { - m_m.mark_dirty_page(haddr, pma_index); - } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) bool do_putchar(uint8_t c) const { return m_m.putchar(c); @@ -542,6 +544,15 @@ class state_access : void do_write_counter(uint64_t val, const char *name, const char *domain) const { m_m.write_counter(val, name, domain); } + + // ----- + // i_accept_dirty_pages interface implementation + // ----- + friend i_accept_dirty_pages; + + void do_mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { + m_m.mark_dirty_page(paddr, pma_index); + } }; } // namespace cartesi diff --git a/src/step-log-layout.hpp b/src/step-log-layout.hpp new file mode 100644 index 000000000..5aaa93a7e --- /dev/null +++ b/src/step-log-layout.hpp @@ -0,0 +1,92 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef STEP_LOG_LAYOUT_HPP +#define STEP_LOG_LAYOUT_HPP + +/// \file +/// \brief On-disk layout of a binary step log file. +/// +/// A step log is a fixed-size header followed by three variable-length arrays: +/// +/// step_log_header header; +/// page_entry pages[header.page_count]; +/// node_entry nodes[header.node_count]; +/// machine_hash siblings[header.sibling_count]; +/// +/// All multi-byte integer fields use host (little-endian) byte order; there is no endian marker. + +#include +#include + +#include "address-range-constants.hpp" +#include "compiler-defines.hpp" +#include "hash-tree-constants.hpp" +#include "machine-hash.hpp" + +namespace cartesi { + +/// \brief Signature at the start of a binary step log file: magic + version + reserved. +/// Version byte is 3: all array counts live in the header. +constexpr std::array STEP_LOG_SIGNATURE = {'C', 'T', 'S', 'I', 3, 0, 0, 0}; + +/// \brief Fixed-size prefix of a step log file. +struct PACKED step_log_header { + std::array signature; ///< STEP_LOG_SIGNATURE (magic + version + reserved) + machine_hash root_hash_before; ///< Machine root hash before the step + uint64_t requested_cycle_count; ///< Cycle count requested by the caller: mcycle delta for log_step, + ///< uarch_cycle delta for log_step_uarch, 0 for log_reset_uarch and + ///< log_send_cmio_response (operations without a cycle request). + machine_hash root_hash_after; ///< Machine root hash after the step + uint64_t hash_function; ///< Value of hash_function_type used to hash the log + uint64_t page_count; ///< Number of entries in the pages array + uint64_t node_count; ///< Number of entries in the nodes array + uint64_t sibling_count; ///< Number of entries in the siblings array +}; +static_assert(sizeof(step_log_header) == 112, "expected wire size of step_log_header is 112 bytes"); + +/// \brief One touched page of memory recorded in a step log. +/// \details The recorder writes \p hash zero-filled; the replayer fills it with +/// merkle_tree_hash(data) at verify time and uses it to reconstruct the root. +struct PACKED page_entry { + uint64_t index; ///< Page index (byte address >> log2_page_size) + unsigned char data[HASH_TREE_PAGE_SIZE]; ///< Page contents as of first touch + machine_hash hash; ///< Scratch slot; must be zero on disk +}; +static_assert(sizeof(page_entry) == 4136, "expected wire size of page_entry is 4136 bytes"); +// The page wire format is written by the recorder using HASH_TREE_* and reconstructed by +// the replayer using AR_*; the two must denote the same page geometry, or recorder and +// replayer would disagree on page-index mapping and page data length. +static_assert(HASH_TREE_LOG2_PAGE_SIZE == AR_LOG2_PAGE_SIZE, + "step-log page wire format requires the hash-tree and address-range page log2 sizes to match"); +static_assert(HASH_TREE_PAGE_SIZE == AR_PAGE_SIZE, + "step-log page_entry.data size must equal the AR_PAGE_SIZE used by the replayer"); + +/// \brief One subtree-write (bulk write spanning > 1 page) recorded in a step log. +/// \details hash_before / hash_after are the subtree hashes around the bulk write; the +/// replayer picks one depending on whether it is reconstructing root_hash_before or +/// root_hash_after. +struct PACKED node_entry { + uint64_t address; ///< Subtree start address; must be aligned to 2^log2_size + uint64_t log2_size; ///< log2 of the subtree size; must be > page-log2 and <= root-log2 + machine_hash hash_before; ///< Subtree hash captured before the bulk write + machine_hash hash_after; ///< Subtree hash after the bulk write +}; +static_assert(sizeof(node_entry) == 80, "expected wire size of node_entry is 80 bytes"); + +} // namespace cartesi + +#endif diff --git a/src/step-log-util.lua b/src/step-log-util.lua new file mode 100755 index 000000000..8d5dad3db --- /dev/null +++ b/src/step-log-util.lua @@ -0,0 +1,441 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: Apache-2.0 +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +local cartesi +local cartesi_util +do + local ok, mod = pcall(require, "cartesi") + if ok then cartesi = mod end + local ok_u, util = pcall(require, "cartesi.util") + if ok_u then cartesi_util = util end +end + +local HASH_SIZE = cartesi and cartesi.HASH_SIZE or 32 +local PAGE_LOG2_SIZE = cartesi and cartesi.HASH_TREE_LOG2_PAGE_SIZE or 12 +local PAGE_DATA_SIZE = 1 << PAGE_LOG2_SIZE +local STEP_LOG_SIGNATURE = cartesi and cartesi.STEP_LOG_SIGNATURE or "CTSI\x03\x00\x00\x00" +local SIGNATURE_SIZE = #STEP_LOG_SIGNATURE + +-- step_log_header layout (see src/step-log.hpp): +-- signature(8) + root_hash_before(32) + requested_cycle_count(8) + root_hash_after(32) +-- + hash_function(8) + page_count(8) + node_count(8) + sibling_count(8) +local HEADER_SIZE = SIGNATURE_SIZE + 2 * HASH_SIZE + 5 * 8 +local PAGE_ENTRY_SIZE = 8 + PAGE_DATA_SIZE + HASH_SIZE +local NODE_ENTRY_SIZE = 8 + 8 + 2 * HASH_SIZE +local SIBLING_SIZE = HASH_SIZE + +local HASH_FUNCTION_NAMES = { + [cartesi and cartesi.HASH_FUNCTION_KECCAK256 or 0] = "keccak256", + [cartesi and cartesi.HASH_FUNCTION_SHA256 or 1] = "sha256", +} + +local hexstring = (cartesi_util and cartesi_util.hexstring) + or function(s) + return (s:gsub(".", function(c) return string.format("%02x", string.byte(c)) end)) + end + +local REGIONS +local DRIVE_START +if cartesi then + REGIONS = { + { name = "shadow_state", start = cartesi.AR_SHADOW_STATE_START, length = cartesi.AR_SHADOW_STATE_LENGTH }, + { name = "shadow_tlb", start = cartesi.AR_SHADOW_TLB_START, length = cartesi.AR_SHADOW_TLB_LENGTH }, + { name = "shadow_pmas", start = cartesi.AR_PMAS_START, length = cartesi.AR_PMAS_LENGTH }, + { name = "shadow_uarch", start = cartesi.UARCH_SHADOW_START_ADDRESS, length = cartesi.UARCH_SHADOW_LENGTH }, + { name = "uarch_ram", start = cartesi.UARCH_RAM_START_ADDRESS, length = cartesi.UARCH_RAM_LENGTH }, + { name = "clint", start = cartesi.AR_CLINT_START, length = cartesi.AR_CLINT_LENGTH }, + { name = "htif", start = cartesi.AR_HTIF_START, length = cartesi.AR_HTIF_LENGTH }, + { name = "plic", start = cartesi.AR_PLIC_START, length = cartesi.AR_PLIC_LENGTH }, + { + name = "cmio_rx", + start = cartesi.AR_CMIO_RX_BUFFER_START, + length = 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE, + }, + { + name = "cmio_tx", + start = cartesi.AR_CMIO_TX_BUFFER_START, + length = 1 << cartesi.AR_CMIO_TX_BUFFER_LOG2_SIZE, + }, + { name = "dtb", start = cartesi.AR_DTB_START, length = cartesi.AR_DTB_LENGTH }, + { name = "ram", start = cartesi.AR_RAM_START, length = 0x100000000 }, + } + -- Flash drives extend open-ended above AR_DRIVE_START. + DRIVE_START = cartesi.AR_DRIVE_START +end + +local function classify_address(addr) + if not REGIONS then return "unknown" end + for _, r in ipairs(REGIONS) do + if addr >= r.start and addr < r.start + r.length then return r.name end + end + if DRIVE_START and addr >= DRIVE_START then return "flash" end + return "unknown" +end + +local function format_size(bytes) + if bytes < 1024 then return string.format("%d B", bytes) end + if bytes < 1024 * 1024 then return string.format("%.1f KB", bytes / 1024) end + return string.format("%.1f MB", bytes / (1024 * 1024)) +end + +local function hash_function_name(code) return HASH_FUNCTION_NAMES[code] or string.format("unknown(%d)", code) end + +local function parse_header(data) + assert(#data >= HEADER_SIZE, string.format("step log header too small (got %d bytes, need %d)", #data, HEADER_SIZE)) + assert(data:sub(1, SIGNATURE_SIZE) == STEP_LOG_SIGNATURE, "invalid step log signature") + local off = SIGNATURE_SIZE + 1 + local hdr = {} + hdr.root_hash_before = data:sub(off, off + HASH_SIZE - 1) + off = off + HASH_SIZE + hdr.requested_cycle_count = string.unpack(" = assert(io.open(path, "rb")) + local data = f:read(HEADER_SIZE) + assert(data, "step log is empty: " .. path) + local hdr = parse_header(data) + hdr.path = path + return hdr +end + +-- Reads page indices but skips page bodies + scratch hashes to keep memory bounded. +local function read_full(path) + local f = assert(io.open(path, "rb")) + local file_size = assert(f:seek("end")) + f:seek("set", 0) + + local hdr_bytes = f:read(HEADER_SIZE) + assert(hdr_bytes, "step log is empty: " .. path) + local info = parse_header(hdr_bytes) + info.path = path + info.file_size = file_size + + info.page_indices = {} + for i = 1, info.page_count do + local idx_bytes = f:read(8) + assert(idx_bytes and #idx_bytes == 8, string.format("step log truncated at page %d/%d", i, info.page_count)) + info.page_indices[i] = string.unpack(" 0 then + local region_counts = {} + for _, idx in ipairs(info.page_indices) do + local region = classify_address(idx << PAGE_LOG2_SIZE) + region_counts[region] = (region_counts[region] or 0) + 1 + end + local sorted = {} + for name, count in pairs(region_counts) do + sorted[#sorted + 1] = { name, count } + end + table.sort(sorted, function(a, b) return a[2] > b[2] end) + local parts = {} + for _, r in ipairs(sorted) do + parts[#parts + 1] = string.format("%s=%d", r[1], r[2]) + end + print(string.format(" page_regions: %s", table.concat(parts, " "))) + end +end + +local function print_stats_row(info) + print( + string.format( + "%-12d %-6d %-6d %-8d %-12s %s", + info.requested_cycle_count, + info.page_count, + info.node_count, + info.sibling_count, + format_size(info.file_size), + info.path + ) + ) +end + +local commands = {} + +function commands.info(args) + local path = assert(args[1], "usage: step-log-util.lua info ") + print_header(read_header(path)) +end + +commands["root-hash-before"] = function(args) + local path = assert(args[1], "usage: step-log-util.lua root-hash-before ") + io.write(hexstring(read_header(path).root_hash_before)) +end + +commands["root-hash-after"] = function(args) + local path = assert(args[1], "usage: step-log-util.lua root-hash-after ") + io.write(hexstring(read_header(path).root_hash_after)) +end + +commands["requested-cycle-count"] = function(args) + local path = assert(args[1], "usage: step-log-util.lua requested-cycle-count ") + io.write(tostring(read_header(path).requested_cycle_count)) +end + +commands.signature = function(args) + local path = assert(args[1], "usage: step-log-util.lua signature ") + local f = assert(io.open(path, "rb")) + local sig = f:read(SIGNATURE_SIZE) + if sig == STEP_LOG_SIGNATURE then + io.write("ok\n") + else + io.write( + string.format( + "mismatch: got %s, expected %s\n", + sig and hexstring(sig) or "", + hexstring(STEP_LOG_SIGNATURE) + ) + ) + os.exit(1) + end +end + +function commands.stats(args) + assert(args[1], "usage: step-log-util.lua stats [step-log ...]") + for i, path in ipairs(args) do + print_stats(read_full(path)) + if i < #args then print() end + end +end + +commands["batch-stats"] = function(args) + local dir = assert(args[1], "usage: step-log-util.lua batch-stats ") + local files = {} + local pipe = assert(io.popen(string.format('ls -1 "%s"/*.log 2>/dev/null', dir))) + for line in pipe:lines() do + files[#files + 1] = line + end + pipe:close() + assert(#files > 0, "no .log files found in " .. dir) + table.sort(files) + + print(string.format("%-12s %-6s %-6s %-8s %-12s %s", "cycles", "pages", "nodes", "siblings", "size", "path")) + print(string.rep("-", 80)) + + local total = { page = 0, node = 0, sibling = 0, size = 0 } + local max = { page = 0, node = 0, sibling = 0, size = 0 } + local max_path = { page = "", node = "", sibling = "", size = "" } + for _, path in ipairs(files) do + local ok, info = pcall(read_full, path) + if ok then + print_stats_row(info) + total.page = total.page + info.page_count + total.node = total.node + info.node_count + total.sibling = total.sibling + info.sibling_count + total.size = total.size + info.file_size + if info.page_count > max.page then + max.page = info.page_count + max_path.page = path + end + if info.node_count > max.node then + max.node = info.node_count + max_path.node = path + end + if info.sibling_count > max.sibling then + max.sibling = info.sibling_count + max_path.sibling = path + end + if info.file_size > max.size then + max.size = info.file_size + max_path.size = path + end + else + io.stderr:write(string.format("warning: skipping %s: %s\n", path, tostring(info))) + end + end + + print(string.rep("-", 80)) + print(string.format("Files: %d", #files)) + print( + string.format( + "Avg pages: %.1f Avg nodes: %.1f Avg siblings: %.1f Avg size: %s", + total.page / #files, + total.node / #files, + total.sibling / #files, + format_size(total.size // #files) + ) + ) + print(string.format("Max pages: %d (%s)", max.page, max_path.page)) + print(string.format("Max nodes: %d (%s)", max.node, max_path.node)) + print(string.format("Max siblings: %d (%s)", max.sibling, max_path.sibling)) + print(string.format("Max size: %s (%s)", format_size(max.size), max_path.size)) +end + +function commands.pages(args) + local path = assert(args[1], "usage: step-log-util.lua pages ") + local info = read_full(path) + print(string.format("%-8s %-18s %s", "index", "address", "region")) + print(string.rep("-", 50)) + for _, idx in ipairs(info.page_indices) do + local addr = idx << PAGE_LOG2_SIZE + print(string.format("%-8d 0x%016x %s", idx, addr, classify_address(addr))) + end +end + +function commands.nodes(args) + local path = assert(args[1], "usage: step-log-util.lua nodes ") + local info = read_full(path) + if info.node_count == 0 then + print("No nodes recorded in " .. path) + return + end + print(string.format("%-18s %-9s %-64s %-64s %s", "address", "log2_size", "hash_before", "hash_after", "region")) + print(string.rep("-", 170)) + for _, n in ipairs(info.nodes) do + print( + string.format( + "0x%016x %-9d %s %s %s", + n.address, + n.log2_size, + hexstring(n.hash_before), + hexstring(n.hash_after), + classify_address(n.address) + ) + ) + end +end + +local function help() + io.stderr:write(string.format( + [=[ +Usage: + + %s [args...] + +Commands: + + info Human-readable header summary + signature Verify the step log signature (exit non-zero on mismatch) + root-hash-before Print root hash before (hex, no newline) + requested-cycle-count + Print requested_cycle_count from header (decimal, no newline) + root-hash-after Print root hash after (hex, no newline) + stats [...] Full structural breakdown (sizes, regions) + batch-stats Aggregate stats table for all .log files in + pages List page indices, addresses, regions + nodes List logged subtree-write nodes + +Region classification needs the cartesi shared library; if not available, +the region column degrades to "unknown". + +]=], + arg[0] + )) + os.exit() +end + +local options = { + { + "^%-h$", + function(all) + if not all then return false end + help() + end, + }, + { + "^%-%-help$", + function(all) + if not all then return false end + help() + end, + }, + { + ".*", + function(all) error("unrecognized option " .. all) end, + }, +} + +local values = {} +for _, argument in ipairs(arg) do + if argument:sub(1, 1) == "-" then + for _, option in ipairs(options) do + if option[2](argument:match(option[1])) then break end + end + else + values[#values + 1] = argument + end +end + +if not values[1] then help() end +local cmd_name = values[1] +assert(commands[cmd_name], "unknown command '" .. cmd_name .. "', use --help for usage") + +local cmd_args = {} +for i = 2, #values do + cmd_args[i - 1] = values[i] +end + +commands[cmd_name](cmd_args) diff --git a/src/step-log.hpp b/src/step-log.hpp new file mode 100644 index 000000000..8a7cee466 --- /dev/null +++ b/src/step-log.hpp @@ -0,0 +1,394 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef STEP_LOG_HPP +#define STEP_LOG_HPP + +/// \file +/// \brief Parsed binary step log: the witnessed tree, the queries every replayer runs over it, and +/// the root-hash recompute. +/// +/// Heap-free so it builds in the risc0 guest; the host and zkVM hash backends are selected by +/// ZKARCHITECTURE. + +#include +#include +#include +#include +#include +#include + +#include "address-range-constants.hpp" +#include "hash-tree-constants.hpp" +#include "i-hasher.hpp" +#include "machine-hash.hpp" +#include "step-log-layout.hpp" +#include "throw.hpp" +#include "uint128.hpp" +#include "variant-hasher.hpp" + +namespace cartesi { + +using hash_type = unsigned char (*)[MACHINE_HASH_SIZE]; +using const_hash_type = const unsigned char (*)[MACHINE_HASH_SIZE]; + +#ifdef ZKARCHITECTURE + +extern "C" void zk_merkle_tree_hash(hash_function_type hash_function, const unsigned char *data, size_t size, + hash_type hash); + +extern "C" void zk_concat_hash(hash_function_type hash_function, const_hash_type left, const_hash_type right, + hash_type result); + +inline void merkle_tree_hash(hash_function_type hash_function, const unsigned char *data, size_t size, hash_type hash) { + zk_merkle_tree_hash(hash_function, data, size, hash); +} + +inline void concat_hash(hash_function_type hash_function, const_hash_type left, const_hash_type right, + hash_type result) { + zk_concat_hash(hash_function, left, right, result); +} + +#else + +inline void merkle_tree_hash(hash_function_type hash_function, const unsigned char *data, size_t size, hash_type hash) { + variant_hasher h{hash_function}; + get_merkle_tree_hash(h, std::span{data, size}, HASH_TREE_WORD_SIZE, + machine_hash_view{*hash, MACHINE_HASH_SIZE}); +} + +inline void concat_hash(hash_function_type hash_function, const_hash_type left, const_hash_type right, + hash_type result) { + variant_hasher h{hash_function}; + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + get_concat_hash(h, *reinterpret_cast(left), *reinterpret_cast(right), + *reinterpret_cast(result)); + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) +} + +#endif + +// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast,misc-no-recursion) + +/// \brief Parsed binary step log: the witnessed tree plus the header values it was decoded from. +/// \details Non-owning -- pages/nodes/siblings are spans into the caller's log image. \p pages is +/// mutable so the root recompute can rehash each page into its scratch slot. The queries and the +/// recompute are caller-invariant: every replayer finds a page, finds a node, and reconstructs the +/// root identically. +struct step_log { + machine_hash root_hash_before{}; ///< Root hash before the step (from log header) + uint64_t requested_cycle_count{0}; ///< Caller-requested cycle count (from log header; see step_log_header) + machine_hash root_hash_after{}; ///< Root hash after the step (from log header) + hash_function_type hash_function{hash_function_type::keccak256}; ///< Hash function used for the step log + std::span pages; ///< Witnessed pages, rehashed into their scratch slots + std::span nodes; ///< Subtree-write nodes + std::span siblings; ///< Sibling hashes for untouched subtrees + uint64_t consumed_node_count{0}; ///< Nodes a semantic write consumed during replay; see compute_root_hash + + /// \brief Decode and validate a binary step log. + /// \param log_image Pointer to the step log file bytes. The returned step_log keeps spans into it, + /// so it must outlive the step_log. + /// \param log_size Size of the log bytes. + /// \return A validated step_log whose witnessed tree reconstructs root_hash_before. + /// \throw runtime_error if the log is malformed or the initial root hash does not match. + /// \details Mirrors StepLog.decode in the Solidity replayer: header parse, per-count size bounds, + /// page ordering, per-node alignment/range, the combined pages+nodes disjointness walk, and the + /// initial-root recompute. + static step_log decode(unsigned char *log_image, uint64_t log_size) { + step_log log; + // Parse header with a stack-local copy to avoid alignment UB on the log buffer + if (log_size < sizeof(step_log_header)) { + THROW(std::runtime_error, "step log shorter than header"); + } + step_log_header header{}; + std::memcpy(&header, log_image, sizeof(header)); + if (header.signature != STEP_LOG_SIGNATURE) { + THROW(std::runtime_error, "invalid step log signature"); + } + switch (header.hash_function) { + case static_cast(hash_function_type::keccak256): + log.hash_function = hash_function_type::keccak256; + break; + case static_cast(hash_function_type::sha256): + log.hash_function = hash_function_type::sha256; + break; + default: + THROW(std::runtime_error, "invalid log format: unsupported hash function type"); + } + // Bound each count against remaining log bytes. Division avoids overflow + // (remaining stays within log_size; each *_count * sizeof is then safe). + uint64_t remaining = log_size - sizeof(step_log_header); + if (header.page_count > remaining / sizeof(page_entry)) { + THROW(std::runtime_error, "page count exceeds step log size"); + } + const uint64_t pages_bytes = header.page_count * sizeof(page_entry); + remaining -= pages_bytes; + if (header.node_count > remaining / sizeof(node_entry)) { + THROW(std::runtime_error, "node count exceeds step log size"); + } + const uint64_t nodes_bytes = header.node_count * sizeof(node_entry); + remaining -= nodes_bytes; + if (remaining % sizeof(machine_hash) != 0 || header.sibling_count != remaining / sizeof(machine_hash)) { + THROW(std::runtime_error, "sibling count does not match step log size"); + } + log.root_hash_before = header.root_hash_before; + log.requested_cycle_count = header.requested_cycle_count; + log.root_hash_after = header.root_hash_after; + if (header.page_count == 0) { + THROW(std::runtime_error, "page count is zero"); + } + log.pages = std::span{reinterpret_cast(log_image + sizeof(step_log_header)), + static_cast(header.page_count)}; + log.nodes = std::span{ + reinterpret_cast(log_image + sizeof(step_log_header) + pages_bytes), + static_cast(header.node_count)}; + log.siblings = std::span{ + reinterpret_cast(log_image + sizeof(step_log_header) + pages_bytes + nodes_bytes), + static_cast(header.sibling_count)}; + + validate_pages_ordered(log.pages); + validate_nodes_aligned(log.nodes); + validate_entries_ordered_and_disjoint(log.pages, log.nodes); + // Pre-state integrity: the recomputed root must match the header's claim. + if (log.compute_root_hash(false) != log.root_hash_before) { + THROW(std::runtime_error, "initial root hash mismatch"); + } + return log; + } + + /// \brief Try to find a witnessed page by its physical address. + /// \param paddr_page Page-aligned physical address. + /// \return Pointer to the page entry if witnessed, nullptr otherwise. + page_entry *try_find_page(uint64_t paddr_page) const { + const auto page_index = paddr_page >> AR_LOG2_PAGE_SIZE; + auto it = std::ranges::lower_bound(pages, page_index, std::ranges::less{}, + [](const auto &page) { return page.index; }); + if (it != pages.end() && it->index == page_index) { + return &(*it); + } + return nullptr; + } + + /// \brief Find a witnessed page by its physical address, or throw if absent. + page_entry *find_page(uint64_t paddr_page) const { + auto *page_log = try_find_page(paddr_page); + if (page_log == nullptr) { + THROW(std::runtime_error, "required page not found"); + } + return page_log; + } + + /// \brief Try to find a subtree-write node by its start address. + /// \param address Subtree start address. + /// \return Pointer to the node entry if present, nullptr otherwise. + const node_entry *try_find_node(uint64_t address) const { + auto it = + std::ranges::lower_bound(nodes, address, std::ranges::less{}, [](const auto &n) { return n.address; }); + if (it != nodes.end() && it->address == address) { + return &(*it); + } + return nullptr; + } + + /// \brief Recompute the machine root hash from the witnessed tree. + /// \param use_after When false, use each node's hash_before (reconstructs root_hash_before). When + /// true, use each node's hash_after (reconstructs root_hash_after). + /// \details Hashes each page lazily into its scratch slot, then walks the tree with three cursors + /// (pages, nodes, siblings) to produce the root hash. A zero scratch slot means "needs hashing": + /// pages arrive zero on the wire (validate_pages_ordered enforces it), and every replay accessor + /// re-zeros a page's slot when it writes the page. So the before-replay call hashes all pages and + /// the after-replay call rehashes only the pages the operation actually wrote -- clean pages keep + /// the hash the before pass validated against root_hash_before, which is byte-identical post-step. + /// Nodes pick between their two precomputed hashes based on \p use_after. + machine_hash compute_root_hash(bool use_after) const { + static const machine_hash all_zeros{}; + for (auto &page : pages) { + if (page.hash == all_zeros) { + merkle_tree_hash(hash_function, page.data, AR_PAGE_SIZE, reinterpret_cast(&page.hash)); + } + } + size_t next_page = 0; + size_t next_node = 0; + size_t next_sibling = 0; + auto root_hash = compute_subtree(0, HASH_TREE_LOG2_ROOT_SIZE - AR_LOG2_PAGE_SIZE, next_page, next_node, + next_sibling, use_after); + if (next_page != pages.size()) { + THROW(std::runtime_error, "too many pages in log"); + } + if (next_node != nodes.size()) { + THROW(std::runtime_error, "too many nodes in log"); + } + if (next_sibling != siblings.size()) { + THROW(std::runtime_error, "too many sibling hashes in log"); + } + if (use_after) { + check_all_nodes_consumed(); + } + return root_hash; + } + + /// \brief Assert every witnessed node was consumed by a semantic write during replay. + /// \details Post-state soundness: a node's hash_after is folded into root_hash_after verbatim, so + /// every node must be produced by a semantic write (cmio supra-page or uarch reset); an unconsumed + /// node would inject an arbitrary post-state subtree. compute_root_hash(true) calls this. A reverted + /// operation substitutes a recorded root instead of recomputing it, so it must call this explicitly + /// to keep the same guarantee. + void check_all_nodes_consumed() const { + if (consumed_node_count != nodes.size()) { + THROW(std::runtime_error, "unconsumed node in step log"); + } + } + +private: + /// \brief Validate that witnessed pages are strictly ascending by index, with a zero scratch slot. + /// \param pages Witnessed pages, in wire order. + /// \throw runtime_error if a page index is not strictly increasing or a scratch hash is non-zero. + static void validate_pages_ordered(std::span pages) { + static const machine_hash all_zeros{}; + for (size_t i = 0; i < pages.size(); i++) { + if (i > 0 && pages[i - 1].index >= pages[i].index) { + THROW(std::runtime_error, "invalid log format: page index is not in increasing order"); + } + // find_page binary-searches by page.data address, so data pointers must increase monotonically. + // Unreachable while all pages share one contiguous buffer (data order then follows the index order + // above), but it fail-closes should pages ever be allocated independently. + // LCOV_EXCL_START + if (i > 0 && +pages[i - 1].data >= +pages[i].data) { + THROW(std::runtime_error, "invalid log format: page data is not in increasing order"); + } + // LCOV_EXCL_STOP + if (pages[i].hash != all_zeros) { + THROW(std::runtime_error, "invalid log format: page scratch hash area is not zero"); + } + } + } + + /// \brief Validate each node's size range and address alignment. + /// \param nodes Subtree-write nodes, in wire order. + /// \throw runtime_error if a node's log2 size is out of range or its address is not aligned to its size. + /// \details Runs before the disjointness walk, which relies on log2_size <= HASH_TREE_LOG2_ROOT_SIZE + /// to keep its 1 << log2_size shift well defined. + static void validate_nodes_aligned(std::span nodes) { + for (const auto &n : nodes) { + if (n.log2_size <= HASH_TREE_LOG2_PAGE_SIZE || n.log2_size > HASH_TREE_LOG2_ROOT_SIZE) { + THROW(std::runtime_error, "invalid log format: node log2 size out of range"); + } + // A node at HASH_TREE_LOG2_ROOT_SIZE must have address 0; UINT64_C(1) << + // HASH_TREE_LOG2_ROOT_SIZE would be undefined. Mirrors + // StepLog.validateEntriesOrderedAndDisjoint's alignment special case. + if (n.log2_size == HASH_TREE_LOG2_ROOT_SIZE) { + if (n.address != 0) { + THROW(std::runtime_error, "invalid log format: node address not aligned to its size"); + } + } else { + const auto node_size = UINT64_C(1) << n.log2_size; + if ((n.address & (node_size - 1)) != 0) { + THROW(std::runtime_error, "invalid log format: node address not aligned to its size"); + } + } + } + } + + /// \brief Validate that the combined pages+nodes stream is strictly ascending and disjoint. + /// \param pages Witnessed pages, ascending by index. + /// \param nodes Subtree-write nodes, with validated sizes (see validate_nodes_aligned). + /// \throw runtime_error if any entry starts before the previous entry's end. + /// \details Same algorithm as StepLog.validateEntriesOrderedAndDisjoint in the Solidity replayer. + /// 128-bit arithmetic so an entry ending at 2^64 cannot overflow. + static void validate_entries_ordered_and_disjoint(std::span pages, + std::span nodes) { + size_t pi = 0; // page index cursor + size_t ni = 0; // node index cursor + // end of the previous entry (page or node), for overlap checking + uint128_t prev_end = 0; + while (pi < pages.size() || ni < nodes.size()) { + uint128_t entry_start{}; // page or node used in this iteration + uint128_t entry_end{}; // page or node used in this iteration + bool take_page = false; // take next entry from pages or nodes + if (pi >= pages.size()) { + take_page = false; + } else if (ni >= nodes.size()) { + take_page = true; + } else { + const uint128_t page_start = static_cast(pages[pi].index) << AR_LOG2_PAGE_SIZE; + take_page = page_start < nodes[ni].address; + } + if (take_page) { + entry_start = static_cast(pages[pi].index) << AR_LOG2_PAGE_SIZE; + entry_end = entry_start + (static_cast(1) << AR_LOG2_PAGE_SIZE); + ++pi; + } else { + entry_start = nodes[ni].address; + entry_end = entry_start + (static_cast(1) << nodes[ni].log2_size); + ++ni; + } + if (entry_start < prev_end) { + THROW(std::runtime_error, "invalid log format: page or node overlaps a previous entry"); + } + prev_end = entry_end; + } + } + + /// \brief Recursively reconstruct the root hash of the subtree rooted at page_index. + /// \param page_index Index of the first page in the subtree. + /// \param log2_page_count Log2 of the number of pages in the subtree. + /// \param next_page Cursor into the pages array; advances past each page consumed. + /// \param next_node Cursor into the nodes array; advances past each node consumed. + /// \param next_sibling Cursor into the sibling hashes; advances past each sibling consumed. + /// \param use_after Selects which of a node's two stored hashes to use (see compute_root_hash). + /// \return Root hash of the subtree at (page_index, log2_page_count). + machine_hash compute_subtree(uint64_t page_index, int log2_page_count, size_t &next_page, size_t &next_node, + size_t &next_sibling, bool use_after) const { + const auto subtree_start_addr = page_index << AR_LOG2_PAGE_SIZE; + const auto subtree_log2_size = log2_page_count + AR_LOG2_PAGE_SIZE; + const auto page_count = UINT64_C(1) << log2_page_count; + const auto subtree_end_page_index = page_index + page_count; + // next unconsumed page / node is inside this subtree? + const bool page_in = next_page < pages.size() && pages[next_page].index < subtree_end_page_index; + // shift node address into page-index units to compare with subtree_end_page_index + const bool node_in = + next_node < nodes.size() && (nodes[next_node].address >> AR_LOG2_PAGE_SIZE) < subtree_end_page_index; + if (!page_in && !node_in) { + if (next_sibling >= siblings.size()) { + THROW(std::runtime_error, "too few sibling hashes in log"); + } + return siblings[next_sibling++]; + } + if (node_in && nodes[next_node].address == subtree_start_addr && + nodes[next_node].log2_size == static_cast(subtree_log2_size)) { + const auto &n = nodes[next_node++]; + return use_after ? n.hash_after : n.hash_before; + } + if (log2_page_count > 0) { + auto left = compute_subtree(page_index, log2_page_count - 1, next_page, next_node, next_sibling, use_after); + const auto halfway_page_index = page_index + (page_count >> 1); + auto right = + compute_subtree(halfway_page_index, log2_page_count - 1, next_page, next_node, next_sibling, use_after); + machine_hash hash{}; + concat_hash(hash_function, reinterpret_cast(&left), reinterpret_cast(&right), + reinterpret_cast(&hash)); + return hash; + } + // Leaf: must be a page (nodes have log2_size > page, so can't fit in a single-page subtree) + return pages[next_page++].hash; + } +}; + +// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast,misc-no-recursion) + +} // namespace cartesi + +#endif diff --git a/src/step-pretty-printer.hpp b/src/step-pretty-printer.hpp new file mode 100644 index 000000000..1cf34a9ae --- /dev/null +++ b/src/step-pretty-printer.hpp @@ -0,0 +1,75 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef STEP_PRETTY_PRINTER_HPP +#define STEP_PRETTY_PRINTER_HPP + +/// \file +/// \brief Human-readable printout of a replayed step. + +#include +#include +#include +#include + +namespace cartesi { + +/// \brief Sink that formats a step replay as an indented, human-readable printout. +/// \details Each instruction is bracketed by its mnemonic, with its reads and writes nested +/// underneath. Host-only (it allocates): attach one to a replay context, run the replay, then +/// retrieve the text with str(); the caller decides where to write it. General-purpose, not +/// specific to the microarchitecture. +class step_pretty_printer { + std::ostringstream m_out; + int m_indent{0}; ///< Current bracket nesting depth + uint64_t m_access{0}; ///< 1-based access counter across the whole printout + + std::ostream &line() { + return m_out << std::string(static_cast(m_indent) * 2, ' '); + } + +public: + /// \brief Returns the accumulated printout. + std::string str() const { + return m_out.str(); + } + + void begin_bracket(const char *text) { + line() << "begin " << text << '\n'; + ++m_indent; + } + + void end_bracket(const char *text) { + --m_indent; + line() << "end " << text << '\n'; + } + + /// \brief Emit a read. \p name is the register/field name, or nullptr for plain memory. + void read(const char *name, uint64_t paddr, uint64_t val) { + line() << std::dec << ++m_access << ": read " << (name != nullptr ? name : "") << "@0x" << std::hex << paddr + << ": 0x" << val << '\n'; + } + + /// \brief Emit a write, showing the value before and after. \p name as in read(). + void write(const char *name, uint64_t paddr, uint64_t old_val, uint64_t new_val) { + line() << std::dec << ++m_access << ": write " << (name != nullptr ? name : "") << "@0x" << std::hex << paddr + << ": 0x" << old_val << " -> 0x" << new_val << '\n'; + } +}; + +} // namespace cartesi + +#endif diff --git a/src/test-collect-hashes.lua b/src/test-collect-hashes.lua index 6dadb0bd8..32fd7909a 100755 --- a/src/test-collect-hashes.lua +++ b/src/test-collect-hashes.lua @@ -116,7 +116,7 @@ local function get_root_hashes_directly(input_count) if hashes then tinsert(inputs, hashes) end hashes = {} local input = encode_input(index) - m:send_cmio_response(cartesi.CMIO_YIELD_REASON_ADVANCE_STATE, input) + m:send_cmio_response(m:get_root_hash(), cartesi.HTIF_YIELD_REASON_ADVANCE_STATE, input) index = index + 1 if index >= input_count then break end end @@ -154,7 +154,7 @@ local function get_root_hashes_with_collect(input_count) hashes = {} mcycle_phase = 0 local input = encode_input(index) - m:send_cmio_response(cartesi.CMIO_YIELD_REASON_ADVANCE_STATE, input) + m:send_cmio_response(m:get_root_hash(), cartesi.HTIF_YIELD_REASON_ADVANCE_STATE, input) index = index + 1 if index >= input_count then break end end diff --git a/src/translate-virtual-address.hpp b/src/translate-virtual-address.hpp index 79c34dd93..b34a2b41c 100644 --- a/src/translate-virtual-address.hpp +++ b/src/translate-virtual-address.hpp @@ -47,6 +47,7 @@ #include "compiler-defines.hpp" #include "find-pma.hpp" +#include "i-accept-dirty-pages.hpp" #include "riscv-constants.hpp" namespace cartesi { @@ -68,7 +69,10 @@ static inline bool write_ram_uint64(STATE_ACCESS a, uint64_t paddr, uint64_t val // log writes to memory a.write_memory_word(faddr, pma_index, val); // mark page as dirty so we know to update the hash tree - a.mark_dirty_page(faddr, pma_index); + // only state accesses with a deferred store need this, see i_accept_dirty_pages + if constexpr (is_an_i_accept_dirty_pages_v) { + a.mark_dirty_page(paddr, pma_index); + } return true; } diff --git a/src/uarch-constants.hpp b/src/uarch-constants.hpp index d80b7dfdd..2a56a705f 100644 --- a/src/uarch-constants.hpp +++ b/src/uarch-constants.hpp @@ -22,6 +22,7 @@ #include "address-range-constants.hpp" #include "address-range-defines.h" #include "cm.h" +#include "machine-reg.hpp" #include "uarch-defines.h" namespace cartesi { @@ -66,12 +67,17 @@ static_assert((UARCH_SHADOW_LENGTH & (AR_PAGE_SIZE - 1)) == 0, "UARCH_SHADOW_LEN static_assert((UARCH_RAM_LENGTH & (AR_PAGE_SIZE - 1)) == 0, "UARCH_RAM_LENGTH must be multiple of AR_PAGE_SIZE"); static_assert(UARCH_CYCLE_MAX == CM_UARCH_CYCLE_MAX, "CM_UARCH_CYCLE_MAX must be equal to UARCH_CYCLE_MAX"); +/// \brief Machine state addresses referenced by uarch code that is translated to Solidity +enum uarch_machine_state_addresses : uint64_t { + IFLAGS_Y_ADDRESS = machine_reg_address(machine_reg::iflags_Y), ///< Address of the iflags.Y register + HTIF_TOHOST_ADDRESS = machine_reg_address(machine_reg::htif_tohost), ///< Address of the htif.tohost register +}; + /// \brief ecall function codes enum uarch_ecall_functions : uint64_t { - UARCH_ECALL_FN_HALT = EXPAND_UINT64_C(UARCH_ECALL_FN_HALT_DEF), ///< halt uarch execution - UARCH_ECALL_FN_PUTCHAR = EXPAND_UINT64_C(UARCH_ECALL_FN_PUTCHAR_DEF), ///< putchar - UARCH_ECALL_FN_MARK_DIRTY_PAGE = EXPAND_UINT64_C(UARCH_ECALL_FN_MARK_DIRTY_PAGE_DEF), ///< mark_dirty_page - UARCH_ECALL_FN_WRITE_TLB = EXPAND_UINT64_C(UARCH_ECALL_FN_WRITE_TLB_DEF), ///< write_tlb + UARCH_ECALL_FN_HALT = EXPAND_UINT64_C(UARCH_ECALL_FN_HALT_DEF), ///< halt uarch execution + UARCH_ECALL_FN_PUTCHAR = EXPAND_UINT64_C(UARCH_ECALL_FN_PUTCHAR_DEF), ///< putchar + UARCH_ECALL_FN_WRITE_TLB = EXPAND_UINT64_C(UARCH_ECALL_FN_WRITE_TLB_DEF), ///< write_tlb }; } // namespace cartesi diff --git a/src/uarch-defines.h b/src/uarch-defines.h index 8dc022ed8..f36fa30c8 100644 --- a/src/uarch-defines.h +++ b/src/uarch-defines.h @@ -29,10 +29,10 @@ #define UARCH_LOG2_CYCLE_MAX_DEF 20 // microarchitecture ecall function codes -#define UARCH_ECALL_FN_HALT_DEF 1 // halt uarch -#define UARCH_ECALL_FN_PUTCHAR_DEF 2 // putchar -#define UARCH_ECALL_FN_MARK_DIRTY_PAGE_DEF 3 // mark_dirty_page -#define UARCH_ECALL_FN_WRITE_TLB_DEF 4 // write_tlb +// function code 3 was mark_dirty_page, now removed, and the gap is intentional +#define UARCH_ECALL_FN_HALT_DEF 1 // halt uarch +#define UARCH_ECALL_FN_PUTCHAR_DEF 2 // putchar +#define UARCH_ECALL_FN_WRITE_TLB_DEF 4 // write_tlb // NOLINTEND(cppcoreguidelines-macro-usage,cppcoreguidelines-macro-to-enum,modernize-macro-to-enum) #endif /* end of include guard: UARCH_DEFINES_H */ diff --git a/src/uarch-interpret.cpp b/src/uarch-interpret.cpp index c4ba2f6ae..798661dda 100644 --- a/src/uarch-interpret.cpp +++ b/src/uarch-interpret.cpp @@ -19,6 +19,9 @@ #include #include "collect-uarch-cycle-hashes-state-access.hpp" // IWYU pragma: keep +#include "step-pretty-printer.hpp" // IWYU pragma: keep +#include "uarch-record-step-state-access.hpp" // IWYU pragma: keep +#include "uarch-replay-step-state-access.hpp" // IWYU pragma: keep #include "uarch-state-access.hpp" // IWYU pragma: keep #include "uarch-step.hpp" @@ -52,4 +55,14 @@ template uarch_interpreter_break_reason uarch_interpret(const uarch_state_access template uarch_interpreter_break_reason uarch_interpret(const collect_uarch_cycle_hashes_state_access a, uint64_t uarch_cycle_end); +// Explicit instantiation for uarch_record_step_state_access +template uarch_interpreter_break_reason uarch_interpret(const uarch_record_step_state_access a, + uint64_t uarch_cycle_end); + +// Explicit instantiation for uarch_replay_step_state_access (replay/verify and the host printout) +template uarch_interpreter_break_reason uarch_interpret(const uarch_replay_step_state_access a, + uint64_t uarch_cycle_end); +template uarch_interpreter_break_reason uarch_interpret(const uarch_replay_step_state_access a, + uint64_t uarch_cycle_end); + } // namespace cartesi diff --git a/src/uarch-record-state-access.hpp b/src/uarch-record-state-access.hpp deleted file mode 100644 index 0f089d854..000000000 --- a/src/uarch-record-state-access.hpp +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright Cartesi and individual authors (see AUTHORS) -// SPDX-License-Identifier: LGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation, either version 3 of the License, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License along -// with this program (see COPYING). If not, see . -// - -#ifndef UARCH_RECORD_STATE_ACCESS_HPP -#define UARCH_RECORD_STATE_ACCESS_HPP - -/// \file -/// \brief State access implementation that record and logs all accesses -#include -#include -#include -#include - -#include "access-log.hpp" -#include "hash-tree-constants.hpp" -#include "i-accept-scoped-notes.hpp" -#include "i-prefer-shadow-uarch-state.hpp" -#include "i-uarch-state-access.hpp" -#include "machine-hash.hpp" -#include "machine-reg.hpp" -#include "machine.hpp" -#include "meta.hpp" -#include "scoped-note.hpp" -#include "shadow-tlb.hpp" -#include "shadow-uarch-state.hpp" -#include "uarch-constants.hpp" - -namespace cartesi { - -using namespace std::string_literals; - -/// \details The uarch_record_state_access logs all access to the machine state. -class uarch_record_state_access : - public i_uarch_state_access, - public i_accept_scoped_notes, - public i_prefer_shadow_uarch_state { - - // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) - machine &m_m; ///< Macro machine - access_log &m_log; ///< Access log - // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) - -public: - /// \brief Constructor from machine and uarch states. - /// \param m Reference to machine state. - /// \param log Reference to log. - uarch_record_state_access(machine &m, access_log &log) : m_m(m), m_log(log) { - ; - } - -private: - static std::pair adjust_access(uint64_t paddr, int log2_size) { - static_assert(cartesi::log2_size_v <= HASH_TREE_LOG2_WORD_SIZE, - "Hash tree word size must not be smaller than machine word size"); - if (((paddr >> log2_size) << log2_size) != paddr) { - throw std::invalid_argument{"misaligned access"}; - } - const auto log2_word_size = HASH_TREE_LOG2_WORD_SIZE; - const auto log2_access_size = std::max(log2_size, log2_word_size); - const auto access_paddr = (paddr >> log2_access_size) << log2_access_size; - return {access_paddr, log2_access_size}; - } - - void log_access(access &&a, const char *text) const { - m_log.push_access(std::move(a), text); - } - - static void log_access_type(access &a, access_type type) { - a.set_type(type); - } - - static void log_access_range(access &a, uint64_t paddr, int log2_size) { - a.set_address(paddr); - a.set_log2_size(log2_size); - } - - void log_access_siblings_and_read_hash(access &a, uint64_t paddr, int log2_size) const { - // Since the tree was updated before we started collecting the log, we only update after writes - const auto proof = m_m.get_proof(skip_hash_tree_update, paddr, log2_size); - // The only pieces of data we use from the proof are the target hash and the siblings - a.set_sibling_hashes(proof.get_sibling_hashes()); - a.set_read_hash(proof.get_target_hash()); - } - - static void log_written_hash(access &a, const machine_hash &written_hash) { - a.get_written_hash().emplace(written_hash); - } - - const auto &log_read_data(access &a, uint64_t paddr, int log2_size) const { - // NOLINTBEGIN(bugprone-unchecked-optional-access) - const auto size = UINT64_C(1) << log2_size; - a.get_read().emplace(); - a.get_read().value().resize(size); - m_m.read_memory(paddr, a.get_read().value().data(), size); - return a.get_read().value(); - // NOLINTEND(bugprone-unchecked-optional-access) - } - - void log_read_data_if_requested(access &a, uint64_t paddr, int log2_size) const { - if (m_log.get_log_type().has_large_data()) { - std::ignore = log_read_data(a, paddr, log2_size); - } - } - - void log_written_data(access &a, uint64_t paddr, int log2_size) const { - // NOLINTBEGIN(bugprone-unchecked-optional-access) - const auto size = UINT64_C(1) << log2_size; - a.get_written().emplace(); - a.get_written().value().resize(size); - m_m.read_memory(paddr, a.get_written().value().data(), size); - // NOLINTEND(bugprone-unchecked-optional-access) - } - - void log_written_data_if_requested(access &a, uint64_t paddr, int log2_size) const { - if (m_log.get_log_type().has_large_data()) { - log_written_data(a, paddr, log2_size); - } - } - - uint64_t log_read_word_access(uint64_t paddr, const char *text) const { - const auto log2_size = log2_size_v; - access a; - log_access_type(a, access_type::read); - log_access_range(a, paddr, log2_size); - const auto [access_paddr, access_log2_size] = adjust_access(paddr, log2_size); - log_access_siblings_and_read_hash(a, access_paddr, access_log2_size); - const auto &read_data = log_read_data(a, access_paddr, access_log2_size); - const auto val_offset = paddr - access_paddr; - const auto val = get_word_access_data(read_data, val_offset); - log_access(std::move(a), text); - return val; - } - - uint64_t log_read_reg_access(machine_reg reg) const { - return log_read_word_access(machine_reg_address(reg), machine_reg_get_name(reg)); - } - - template - void log_write_access(uint64_t paddr, int log2_size, WRITE_UPDATE_F write_and_update, const char *text) const { - access a; - log_access_type(a, access_type::write); - log_access_range(a, paddr, log2_size); - const auto [access_paddr, access_log2_size] = adjust_access(paddr, log2_size); - log_access_siblings_and_read_hash(a, access_paddr, access_log2_size); - // We *need* the read data for small writes, because we splice the written into it - if (log2_size < HASH_TREE_LOG2_WORD_SIZE) { - std::ignore = log_read_data(a, access_paddr, access_log2_size); - } else { - log_read_data_if_requested(a, access_paddr, access_log2_size); - } - // Call functor to perform the write and update the tree - write_and_update(); - // The functor updated the tree, so we don't do it again - log_written_hash(a, m_m.get_node_hash(access_paddr, access_log2_size, skip_hash_tree_update)); - // We don't *need* the written for small writes, but it is convenient to always have it (for debugging purposes) - if (log2_size < HASH_TREE_LOG2_WORD_SIZE) { - log_written_data(a, access_paddr, access_log2_size); - } else { - log_written_data_if_requested(a, access_paddr, access_log2_size); - } - log_access(std::move(a), text); - } - - void log_write_reg_access(machine_reg reg, uint64_t val) const { - log_write_access( - machine_reg_address(reg), log2_size_v, - [this, reg, val]() { - m_m.write_reg(reg, val); - if (!m_m.update_hash_tree_page(machine_reg_address(reg))) { - throw std::invalid_argument{"error updating hash tree"}; - }; - }, - machine_reg_get_name(reg)); - } - - // ----- - // i_prefer_shadow_uarch_state interface implementation - // ----- - friend i_prefer_shadow_uarch_state; - - uint64_t do_read_shadow_uarch_state(shadow_uarch_state_what what) const { - return log_read_reg_access(machine_reg_enum(what)); - } - - void do_write_shadow_uarch_state(shadow_uarch_state_what what, uint64_t val) const { - log_write_reg_access(machine_reg_enum(what), val); - } - - // ----- - // i_uarch_state_access interface implementation - // ----- - friend i_uarch_state_access; - - uint64_t do_read_word(uint64_t paddr) const { - return log_read_word_access(paddr, machine::get_what_name(paddr)); - } - - void do_write_word(uint64_t paddr, uint64_t val) const { - log_write_access( - paddr, log2_size_v, - [this, paddr, val]() { - m_m.write_word(paddr, val); - if (!m_m.update_hash_tree_page(paddr)) { - throw std::invalid_argument{"error updating hash tree"}; - }; - }, - machine::get_what_name(paddr)); - } - - void do_write_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, - uint64_t pma_index) const { - const auto slot_paddr = shadow_tlb_get_abs_addr(set_index, slot_index); - log_write_access( - slot_paddr, SHADOW_TLB_SLOT_LOG2_SIZE, - [this, set_index, slot_index, vaddr_page, vp_offset, pma_index]() { - m_m.write_unverified_tlb(set_index, slot_index, vaddr_page, vp_offset, pma_index); - // Entire slot is in a single page - if (!m_m.update_hash_tree_page(shadow_tlb_get_abs_addr(set_index, slot_index))) { - throw std::invalid_argument{"error updating hash tree"}; - }; - }, - "tlb.slot"); - // Writes to TLB slots have to be atomic. - // We can only do atomic writes of entire hash tree nodes. - // Therefore, TLB slot must have a power-of-two size, or at least be aligned to it. - static_assert(SHADOW_TLB_SLOT_SIZE == sizeof(shadow_tlb_slot), "shadow TLB slot size is wrong"); - static_assert((UINT64_C(1) << SHADOW_TLB_SLOT_LOG2_SIZE) == SHADOW_TLB_SLOT_SIZE, - "shadow TLB slot log2 size is wrong"); - static_assert(SHADOW_TLB_SLOT_LOG2_SIZE >= HASH_TREE_LOG2_WORD_SIZE, - "shadow TLB slot must fill at least an entire hash tree word"); - } - - void do_reset_uarch() const { - //??D I'd like to add an static_assert or some other guard mechanism to - // guarantee that uarch.ram and uarch.shadow are alone in the entire - // span of their common hash tree parent node - log_write_access( - UARCH_STATE_START_ADDRESS, UARCH_STATE_LOG2_SIZE, - [this]() { - m_m.reset_uarch(); - // reset_uarch() marks all modified pages as dirty - if (!m_m.update_hash_tree()) { - throw std::invalid_argument{"error updating hash tree"}; - } - }, - "uarch.state"); - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool do_putchar(uint8_t /*c*/) const { - return false; - } - - void do_mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { - // Forward to machine and no need to log - m_m.mark_dirty_page(paddr, pma_index); - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - constexpr const char *do_get_name() const { - return "uarch_record_state_access"; - } - - // ----- - // i_accept_scoped_notes interface implementation - // ----- - friend i_accept_scoped_notes; - - void do_push_begin_bracket(const char *text) const { - m_log.push_begin_bracket(text); - } - - void do_push_end_bracket(const char *text) const { - m_log.push_end_bracket(text); - } - - auto do_make_scoped_note(const char *text) const { - return scoped_note{*this, text}; - } -}; - -} // namespace cartesi - -#endif diff --git a/src/uarch-record-step-state-access.hpp b/src/uarch-record-step-state-access.hpp new file mode 100644 index 000000000..2a644343f --- /dev/null +++ b/src/uarch-record-step-state-access.hpp @@ -0,0 +1,352 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef UARCH_RECORD_STEP_STATE_ACCESS_HPP +#define UARCH_RECORD_STEP_STATE_ACCESS_HPP + +/// \file +/// \brief State access that records a uarch step into a binary step log file + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hash-tree-constants.hpp" +#include "hash-tree.hpp" +#include "i-accept-scoped-notes.hpp" +#include "i-prefer-shadow-uarch-state.hpp" +#include "i-uarch-state-access.hpp" +#include "machine-hash.hpp" +#include "machine-reg.hpp" +#include "machine.hpp" +#include "os-filesystem.hpp" +#include "os.hpp" +#include "scoped-note.hpp" +#include "shadow-tlb.hpp" +#include "shadow-uarch-state.hpp" +#include "step-log.hpp" +#include "uarch-constants.hpp" +#include "unique-c-ptr.hpp" + +namespace cartesi { + +/// \class uarch_record_step_state_access +/// \brief Records a single uarch step into a binary step log file +class uarch_record_step_state_access : + public i_uarch_state_access, + public i_accept_scoped_notes, + public i_prefer_shadow_uarch_state { + + using page_data_type = std::array; + using pages_type = std::map; + using sibling_hashes_type = hash_tree::sibling_hashes_type; + using page_indices_type = std::vector; + using nodes_type = std::map; + +public: + struct context { + /// \brief Constructor of uarch record step state access context + /// \param filename where to save the log + /// \param hash_function hash function type to use for the log + explicit context(std::string filename, hash_function_type hash_function) : + filename(std::move(filename)), + hash_function(hash_function) { + ; + } + std::string filename; ///< Where to save the log + hash_function_type hash_function; ///< Hash function type to use for the log + mutable pages_type touched_pages; ///< Copy of all pages touched during execution + mutable nodes_type touched_nodes; ///< Subtrees touched during execution + }; + +private: + // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) + context &m_context; ///< Context for the recording + machine &m_m; ///< Reference to machine + // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) + +public: + /// \brief Constructor of uarch record step state access + /// \param context Context for the recording with the log filename + /// \param m reference to machine + /// \details The log file is saved when finish() is called + uarch_record_step_state_access(context &context, machine &m) : m_context(context), m_m(m) { + if (os::exists(m_context.filename)) { + throw std::runtime_error("file already exists"); + } + } + + /// \brief Finish recording and save the log file + /// \param root_hash_before Root hash before the step + /// \param requested_cycle_count Cycles requested by the caller (uarch_cycle delta for log_step_uarch, + /// 0 for log_reset_uarch which is not a cycle-running operation) + /// \param root_hash_after Root hash after the step + void finish(const machine_hash &root_hash_before, uint64_t requested_cycle_count, + const machine_hash &root_hash_after) { + // Fill in hash_after for each node. The machine's hash tree was refreshed + // by the outer get_root_hash call, so skip_hash_tree_update is safe. + for (auto &[address, entry] : m_context.touched_nodes) { + entry.hash_after = m_m.get_node_hash(address, static_cast(entry.log2_size), skip_hash_tree_update); + } + auto sibling_hashes = get_sibling_hashes(); + + const step_log_header header{ + .signature = STEP_LOG_SIGNATURE, + .root_hash_before = root_hash_before, + .requested_cycle_count = requested_cycle_count, + .root_hash_after = root_hash_after, + .hash_function = static_cast(m_context.hash_function), + .page_count = m_context.touched_pages.size(), + .node_count = m_context.touched_nodes.size(), + .sibling_count = sibling_hashes.size(), + }; + auto fp = make_unique_fopen(m_context.filename.c_str(), "wb"); + if (fwrite(&header, sizeof(header), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write header to log file"); + } + for (const auto &[address, data] : m_context.touched_pages) { + page_entry entry{ + .index = address >> HASH_TREE_LOG2_PAGE_SIZE, + .data = {}, + .hash = {}, // scratch; replayer fills this in from the data + }; + std::copy_n(data.data(), data.size(), entry.data); + if (fwrite(&entry, sizeof(entry), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write page entry to log file"); + } + } + for (const auto &[_, node] : m_context.touched_nodes) { + if (fwrite(&node, sizeof(node), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write node entry to log file"); + } + } + if (!sibling_hashes.empty() && + fwrite(sibling_hashes.data(), sizeof(machine_hash), sibling_hashes.size(), fp.get()) != + sibling_hashes.size()) { + throw std::runtime_error("Could not write sibling hashes to log file"); + } + } + +private: + /// \brief Mark a page as touched and save its contents + /// \param address address inside the page + void touch_page(uint64_t address) const { + auto page = address & ~PAGE_OFFSET_MASK; + if (m_context.touched_pages.contains(page)) { + return; + } + + // get first node with starting address > page or end of map + auto node_it = m_context.touched_nodes.upper_bound(page); + if (node_it != m_context.touched_nodes.begin()) { + const auto prev_node_it = std::prev(node_it); + const auto prev_node_end = prev_node_it->first + (UINT64_C(1) << prev_node_it->second.log2_size); + // Reject if the page falls inside a previously recorded node's range. + if (prev_node_end > page) { + throw std::runtime_error("page falls inside a recorded node's range"); + } + } + auto [it, _] = m_context.touched_pages.emplace(page, page_data_type()); + m_m.read_memory(page, it->second.data(), it->second.size()); + } + + /// \brief Record that the subtree at (address, log2_size) is being touched. + /// \param address subtree start address (must be aligned to 2^log2_size) + /// \param log2_size log2 of the subtree size (must be > page size and <= root size) + /// \details Captures the subtree's current hash as hash_before. hash_after is + /// filled in during finish() once the machine's tree has been refreshed. + void touch_node(uint64_t address, int log2_size) const { + if (log2_size <= HASH_TREE_LOG2_PAGE_SIZE || log2_size > HASH_TREE_LOG2_ROOT_SIZE) { + throw std::runtime_error("node log2 size is out of range"); + } + const auto node_size = UINT64_C(1) << log2_size; + if ((address & (node_size - 1)) != 0) { + throw std::runtime_error("node address is not aligned to its size"); + } + const auto node_end = address + node_size; + // get first node with starting address >= address or end of map + auto next_node_it = m_context.touched_nodes.lower_bound(address); + // Reject if the next node starts inside this node's range. + if (next_node_it != m_context.touched_nodes.end() && next_node_it->first < node_end) { + throw std::runtime_error("node overlaps an existing node"); + } + if (next_node_it != m_context.touched_nodes.begin()) { + const auto prev_node_it = std::prev(next_node_it); + const auto prev_node_end = prev_node_it->first + (UINT64_C(1) << prev_node_it->second.log2_size); + // Reject if the previous node's range extends into this node's range. + if (prev_node_end > address) { + throw std::runtime_error("node overlaps an existing node"); + } + } + // get first page with starting address >= address or end of map + auto next_page_it = m_context.touched_pages.lower_bound(address); + // Reject if any existing page lies inside the node's range. + if (next_page_it != m_context.touched_pages.end() && next_page_it->first < node_end) { + throw std::runtime_error("node would enclose an existing page"); + } + m_context.touched_nodes.emplace(address, + node_entry{ + .address = address, + .log2_size = static_cast(log2_size), + .hash_before = m_m.get_node_hash(address, log2_size, skip_hash_tree_update), + .hash_after = {}, // filled in by finish() after the outer get_root_hash() refreshes the tree + }); + } + + /// \brief Get the sibling hashes of all touched pages and nodes + /// \details Walks the tree with three cursors (pages, nodes, siblings). + /// A subtree that exactly matches a recorded node is consumed as a node + /// (no sibling emitted). Subtrees with no touched content emit one sibling. + sibling_hashes_type get_sibling_hashes() { + sibling_hashes_type sibling_hashes{}; + page_indices_type page_indices{}; + for (const auto &[address, _] : m_context.touched_pages) { + page_indices.push_back(address >> HASH_TREE_LOG2_PAGE_SIZE); + } + auto next_page_index = page_indices.cbegin(); + auto next_node_it = m_context.touched_nodes.cbegin(); + get_sibling_hashes_impl(0, HASH_TREE_LOG2_ROOT_SIZE - HASH_TREE_LOG2_PAGE_SIZE, page_indices, next_page_index, + next_node_it, sibling_hashes); + if (next_page_index != page_indices.cend()) { + throw std::runtime_error("get_sibling_hashes failed to consume all pages"); + } + if (next_node_it != m_context.touched_nodes.cend()) { + throw std::runtime_error("get_sibling_hashes failed to consume all nodes"); + } + return sibling_hashes; + } + + /// \brief Recursively collect sibling hashes for the subtree rooted at page_index + /// \param page_index Index of the first page in the subtree + /// \param page_count_log2_size Log2 of the number of pages in the subtree + /// \param page_indices All touched page indices, sorted ascending + /// \param next_page_index Cursor into page_indices; advances past each page consumed during recursion + /// \param next_node_it Cursor into touched_nodes; advances past each node consumed during recursion + /// \param sibling_hashes Accumulates sibling hashes for untouched subtrees + void get_sibling_hashes_impl(uint64_t page_index, int page_count_log2_size, page_indices_type &page_indices, + page_indices_type::const_iterator &next_page_index, nodes_type::const_iterator &next_node_it, + sibling_hashes_type &sibling_hashes) { + const auto page_count = UINT64_C(1) << page_count_log2_size; + const auto subtree_start_addr = page_index << HASH_TREE_LOG2_PAGE_SIZE; + const auto subtree_log2_size = page_count_log2_size + HASH_TREE_LOG2_PAGE_SIZE; + const auto subtree_end_page_index = page_index + page_count; + + // is the next unconsumed page inside this subtree? + const bool page_in = next_page_index != page_indices.cend() && *next_page_index < subtree_end_page_index; + // is the next unconsumed node inside this subtree? + const bool node_in = next_node_it != m_context.touched_nodes.cend() && + (next_node_it->first >> HASH_TREE_LOG2_PAGE_SIZE) < subtree_end_page_index; + + if (!page_in && !node_in) { + sibling_hashes.push_back(m_m.get_node_hash(subtree_start_addr, subtree_log2_size, skip_hash_tree_update)); + } else if (node_in && next_node_it->first == subtree_start_addr && + next_node_it->second.log2_size == static_cast(subtree_log2_size)) { + ++next_node_it; + } else if (page_count_log2_size > 0) { + get_sibling_hashes_impl(page_index, page_count_log2_size - 1, page_indices, next_page_index, next_node_it, + sibling_hashes); + get_sibling_hashes_impl(page_index + (UINT64_C(1) << (page_count_log2_size - 1)), page_count_log2_size - 1, + page_indices, next_page_index, next_node_it, sibling_hashes); + } else { + ++next_page_index; + } + } + + // ----- + // i_prefer_shadow_uarch_state interface implementation + // ----- + friend i_prefer_shadow_uarch_state; + + uint64_t do_read_shadow_uarch_state(shadow_uarch_state_what what) const { + touch_page(static_cast(what)); + return m_m.read_reg(machine_reg_enum(what)); + } + + void do_write_shadow_uarch_state(shadow_uarch_state_what what, uint64_t val) const { + touch_page(static_cast(what)); + m_m.write_reg(machine_reg_enum(what), val); + } + + // ----- + // i_uarch_state_access interface implementation + // ----- + friend i_uarch_state_access; + + uint64_t do_read_word(uint64_t paddr) const { + touch_page(paddr); + return m_m.read_word(paddr); + } + + void do_write_word(uint64_t paddr, uint64_t val) const { + touch_page(paddr); + m_m.write_word(paddr, val); + } + + void do_write_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, + uint64_t pma_index) const { + touch_page(shadow_tlb_get_abs_addr(set_index, slot_index)); + m_m.write_unverified_tlb(set_index, slot_index, vaddr_page, vp_offset, pma_index); + } + + void do_reset_uarch() const { + touch_node(UARCH_STATE_START_ADDRESS, UARCH_STATE_LOG2_SIZE); + m_m.reset_uarch(); + } + + void do_revert_state() const { + // Witness the revert root hash leaf so the replayer can recover the canonical post-state + // hash from its page. The substitution into root_hash_after happens in machine::log_reset_uarch. + touch_page(AR_SHADOW_REVERT_ROOT_HASH_START); + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + bool do_putchar(uint8_t /*c*/) const { + return false; + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + constexpr const char *do_get_name() const { + return "uarch_record_step_state_access"; + } + + // ----- + // i_accept_scoped_notes interface implementation + // ----- + friend i_accept_scoped_notes; + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + void do_push_begin_bracket(const char * /*text*/) const { + ; + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + void do_push_end_bracket(const char * /*text*/) const { + ; + } + + auto do_make_scoped_note(const char *text) const { + return scoped_note{*this, text}; + } +}; + +} // namespace cartesi + +#endif diff --git a/src/uarch-replay-state-access.hpp b/src/uarch-replay-state-access.hpp deleted file mode 100644 index 88caf7a16..000000000 --- a/src/uarch-replay-state-access.hpp +++ /dev/null @@ -1,376 +0,0 @@ -// Copyright Cartesi and individual authors (see AUTHORS) -// SPDX-License-Identifier: LGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation, either version 3 of the License, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License along -// with this program (see COPYING). If not, see . -// - -#ifndef UARCH_REPLAY_STATE_ACCESS_HPP -#define UARCH_REPLAY_STATE_ACCESS_HPP - -/// \file -/// \brief State access implementation that replays recorded state accesses - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "access-log.hpp" -#include "hash-tree-constants.hpp" -#include "hash-tree.hpp" -#include "i-accept-scoped-notes.hpp" -#include "i-hasher.hpp" -#include "i-prefer-shadow-uarch-state.hpp" -#include "i-uarch-state-access.hpp" -#include "keccak-256-hasher.hpp" -#include "machine-hash.hpp" -#include "machine-reg.hpp" -#include "machine.hpp" -#include "meta.hpp" -#include "shadow-tlb.hpp" -#include "shadow-uarch-state.hpp" -#include "uarch-constants.hpp" -#include "uarch-pristine-state-hash.hpp" - -namespace cartesi { - -class uarch_replay_state_access : - public i_uarch_state_access, - public i_accept_scoped_notes, - public i_prefer_shadow_uarch_state { - - using proof_type = hash_tree::proof_type; - -public: - struct context { - /// \brief Constructor replay_send_cmio_state_access context - /// \param log Access log to be replayed - /// \param initial_hash Initial root hash - context(const access_log &log, const machine_hash &initial_hash) : - accesses(log.get_accesses()), - root_hash(initial_hash) { - ; - } - const std::vector &accesses; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) - ///< Index of next access to ne consumed - unsigned int next_access{}; - ///< Root hash before next access - machine_hash root_hash; - ///< Hasher needed to verify proofs - keccak_256_hasher hasher; - }; - -private: - context &m_context; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) - -public: - /// \brief Constructor from log of word accesses. - explicit uarch_replay_state_access(uarch_replay_state_access::context &context) : m_context{context} { - ; - } - - void finish() { - if (m_context.next_access != m_context.accesses.size()) { - throw std::invalid_argument{"access log was not fully consumed"}; - } - } - - machine_hash get_root_hash() const { - return m_context.root_hash; - } - -private: - template - static auto get_hash(H &h, const access_data &data) { - machine_hash hash{}; - get_merkle_tree_hash(h, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, hash); - return hash; - } - - std::string access_to_report() const { - auto index = m_context.next_access + 1; - auto digit = index % 10; - const char *suffix = nullptr; - switch (digit) { - case 1: - suffix = "st"; - break; - case 2: - suffix = "nd"; - break; - case 3: - suffix = "rd"; - break; - default: - suffix = "th"; - break; - } - return std::to_string(index) + suffix + " access"; - } - - static constexpr const char *access_type_name(access_type type) { - switch (type) { - case access_type::read: - return "read"; - case access_type::write: - return "write"; - } - return "unknown_"; - } - - static std::pair adjust_access(uint64_t paddr, int log2_size) { - static_assert(cartesi::log2_size_v <= HASH_TREE_LOG2_WORD_SIZE, - "Hash tree word size must not be smaller than machine word size"); - const auto log2_word_size = HASH_TREE_LOG2_WORD_SIZE; - const auto log2_access_size = std::max(log2_size, log2_word_size); - const auto access_paddr = (paddr >> log2_access_size) << log2_access_size; - return {access_paddr, log2_access_size}; - } - - const access &check_access(const char *text) const { - if (m_context.next_access >= m_context.accesses.size()) { - throw std::invalid_argument{"log is missing access " + access_to_report() + " to " + text}; - } - return m_context.accesses[m_context.next_access]; - } - - void check_access_type(const access &a, access_type type, const char *text) const { - if (a.get_type() != type) { - throw std::invalid_argument{ - "expected " + access_to_report() + " to " + access_type_name(type) + " " + text}; - } - if (type == access_type::read) { - if (a.get_written().has_value()) { - throw std::invalid_argument{ - "unexpected written data in " + access_to_report() + " read access to " + text}; - } - if (a.get_written_hash().has_value()) { - throw std::invalid_argument{ - "unexpected written hash in " + access_to_report() + " read access to " + text}; - } - } - } - - void check_access_range(const access &a, access_type type, uint64_t paddr, uint64_t log2_size, - const char *text) const { - if (a.get_address() != paddr) { - std::ostringstream err; - err << "expected " << access_to_report() << " to " << access_type_name(type) << " " << text - << " at address 0x" << std::hex << paddr << "(" << std::dec << paddr << ")"; - throw std::invalid_argument{err.str()}; - } - if (a.get_log2_size() != static_cast(log2_size)) { - throw std::invalid_argument{"expected " + access_to_report() + " to " + text + " to " + - access_type_name(type) + " 2^" + std::to_string(log2_size) + " bytes"}; - } - } - - auto check_access_siblings_and_read_hash(const access &a, const char *text) const { - const auto proof = a.make_proof(m_context.root_hash); - if (!proof.verify(m_context.hasher)) { - throw std::invalid_argument{ - "siblings and read hash do not match root hash before " + access_to_report() + " to " + text}; - } - return proof; - } - - const auto &check_written_hash(const access &a, const machine_hash &expected_hash, const char *text) const { - if (!a.get_written_hash().has_value()) { - throw std::invalid_argument{"missing written hash of " + std::string(text) + " in " + access_to_report()}; - } - // NOLINTBEGIN(bugprone-unchecked-optional-access) - if (a.get_written_hash().value() != expected_hash) { - throw std::invalid_argument{ - "written hash for " + std::string(text) + " does not match expected hash in " + access_to_report()}; - } - return a.get_written_hash().value(); - // NOLINTEND(bugprone-unchecked-optional-access) - } - - const auto &check_read_data(const access &a, const char *text) const { - if (!a.get_read().has_value()) { - throw std::invalid_argument{"missing read data for " + std::string(text) + " in " + access_to_report()}; - } - // check if logged read data hashes to the logged read hash - // NOLINTBEGIN(bugprone-unchecked-optional-access) - const auto computed_read_hash = get_hash(m_context.hasher, a.get_read().value()); - if (a.get_read_hash() != computed_read_hash) { - throw std::invalid_argument{ - "read data for " + std::string(text) + " does not match read hash in " + access_to_report()}; - } - return a.get_read().value(); - // NOLINTEND(bugprone-unchecked-optional-access) - } - - void check_written_data_if_there(const access &a, const machine_hash &written_hash, const char *text) const { - if (!a.get_written().has_value()) { - return; - } - // NOLINTBEGIN(bugprone-unchecked-optional-access) - if (written_hash != get_hash(m_context.hasher, a.get_written().value())) { - throw std::invalid_argument{ - "written data for " + std::string(text) + " does not match written hash in " + access_to_report()}; - } - // NOLINTEND(bugprone-unchecked-optional-access) - } - - void check_read_data_if_there(const access &a, const char *text) const { - if (!a.get_read().has_value()) { - return; - } - // NOLINTBEGIN(bugprone-unchecked-optional-access) - if (a.get_read_hash() != get_hash(m_context.hasher, a.get_read().value())) { - throw std::invalid_argument{ - "read data for " + std::string(text) + " does not match read hash in " + access_to_report()}; - } - // NOLINTEND(bugprone-unchecked-optional-access) - } - - void update_root_hash(const proof_type &proof, const machine_hash &written_hash) const { - m_context.root_hash = proof.bubble_up(m_context.hasher, written_hash); - } - - void check_write_access(uint64_t paddr, uint64_t log2_size, const machine_hash &expected_hash, - const char *text) const { - const auto &a = check_access(text); - check_access_type(a, access_type::write, text); - check_access_range(a, access_type::write, paddr, log2_size, text); - const auto proof = check_access_siblings_and_read_hash(a, text); - const auto &written_hash = check_written_hash(a, expected_hash, text); - check_read_data_if_there(a, text); - check_written_data_if_there(a, written_hash, text); - update_root_hash(proof, written_hash); - m_context.next_access++; - } - - void check_write_word_access(uint64_t paddr, uint64_t val, const char *text) const { - const auto log2_size = log2_size_v; - const auto &a = check_access(text); - check_access_type(a, access_type::write, text); - check_access_range(a, access_type::write, paddr, log2_size, text); - const auto proof = check_access_siblings_and_read_hash(a, text); - auto written_data = check_read_data(a, text); - [[maybe_unused]] const auto [access_paddr, access_log2_size] = adjust_access(paddr, log2_size); - const auto val_offset = paddr - access_paddr; - replace_word_access_data(val, written_data, val_offset); - const auto &written_hash = check_written_hash(a, get_hash(m_context.hasher, written_data), text); - check_written_data_if_there(a, written_hash, text); - update_root_hash(proof, written_hash); - m_context.next_access++; - } - - void check_write_reg_access(machine_reg reg, uint64_t val) const { - check_write_word_access(machine_reg_address(reg), val, machine_reg_get_name(reg)); - } - - uint64_t check_read_word_access(uint64_t paddr, const char *text) const { - const auto log2_size = log2_size_v; - const auto &a = check_access(text); - check_access_type(a, access_type::read, text); - check_access_range(a, access_type::read, paddr, log2_size, text); - std::ignore = check_access_siblings_and_read_hash(a, text); - const auto &read_data = check_read_data(a, text); - [[maybe_unused]] const auto [access_paddr, access_log2_size] = adjust_access(paddr, log2_size); - const auto val_offset = paddr - access_paddr; - const auto val = get_word_access_data(read_data, val_offset); - m_context.next_access++; - return val; - } - - uint64_t check_read_reg_access(machine_reg reg) const { - return check_read_word_access(machine_reg_address(reg), machine_reg_get_name(reg)); - } - - auto get_write_tlb_slot_hash(uint64_t vaddr_page, uint64_t vp_offset, uint64_t pma_index) const { - // Writes to TLB slots have to be atomic. - // We can only do atomic writes of entire hash tree nodes. - // Therefore, TLB slot must have a power-of-two size, or at least be aligned to it. - static_assert(SHADOW_TLB_SLOT_SIZE == sizeof(shadow_tlb_slot), "shadow TLB slot size is wrong"); - static_assert((UINT64_C(1) << SHADOW_TLB_SLOT_LOG2_SIZE) == SHADOW_TLB_SLOT_SIZE, - "shadow TLB slot log2 size is wrong"); - static_assert(SHADOW_TLB_SLOT_LOG2_SIZE >= HASH_TREE_LOG2_WORD_SIZE, - "shadow TLB slot must fill at least an entire hash tree word"); - shadow_tlb_slot slot_data{}; - shadow_tlb_fill_slot(vaddr_page, vp_offset, pma_index, slot_data); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const std::span slot_data_span{reinterpret_cast(&slot_data), sizeof(slot_data)}; - return get_merkle_tree_hash(m_context.hasher, slot_data_span, HASH_TREE_WORD_SIZE); - } - - // ----- - // i_prefer_shadow_uarch_state interface implementation - // ----- - friend i_prefer_shadow_uarch_state; - - uint64_t do_read_shadow_uarch_state(shadow_uarch_state_what what) const { - return check_read_reg_access(machine_reg_enum(what)); - } - - void do_write_shadow_uarch_state(shadow_uarch_state_what what, uint64_t val) const { - check_write_reg_access(machine_reg_enum(what), val); - } - - // ----- - // i_uarch_state_access interface implementation - // ----- - friend i_uarch_state_access; - - uint64_t do_read_word(uint64_t paddr) const { - return check_read_word_access(paddr, machine::get_what_name(paddr)); - } - - void do_write_word(uint64_t paddr, uint64_t val) const { - check_write_word_access(paddr, val, machine::get_what_name(paddr)); - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool do_putchar(uint8_t /*c*/) const { - return false; - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - void do_mark_dirty_page(uint64_t /*paddr*/, uint64_t /*pma_index*/) const { - ; // do nothing - } - - void do_reset_uarch() const { - check_write_access(UARCH_STATE_START_ADDRESS, UARCH_STATE_LOG2_SIZE, get_uarch_pristine_state_hash(), - "uarch.state"); - } - - void do_write_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, - uint64_t pma_index) const { - const auto slot_paddr = shadow_tlb_get_abs_addr(set_index, slot_index); - const auto slot_hash = get_write_tlb_slot_hash(vaddr_page, vp_offset, pma_index); - check_write_access(slot_paddr, SHADOW_TLB_SLOT_LOG2_SIZE, slot_hash, "tlb.slot"); - } - - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - constexpr const char *do_get_name() const { - return "uarch_replay_state_access"; - } - - // ----- - // i_accept_scoped_notes interface implementation - // ----- - friend i_accept_scoped_notes; -}; - -} // namespace cartesi - -#endif diff --git a/src/uarch-replay-step-state-access.hpp b/src/uarch-replay-step-state-access.hpp new file mode 100644 index 000000000..0d7e89270 --- /dev/null +++ b/src/uarch-replay-step-state-access.hpp @@ -0,0 +1,225 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef UARCH_REPLAY_STEP_STATE_ACCESS_HPP +#define UARCH_REPLAY_STEP_STATE_ACCESS_HPP + +/// \file +/// \brief State access that replays a uarch step from a binary step log file + +#include +#include +#include + +#include "address-range-constants.hpp" +#include "compiler-defines.hpp" +#include "host-addr.hpp" +#include "i-accept-scoped-notes.hpp" +#include "i-prefer-shadow-uarch-state.hpp" +#include "i-uarch-state-access.hpp" +#include "machine-hash.hpp" +#include "machine-reg.hpp" +#include "scoped-note.hpp" +#include "shadow-tlb.hpp" +#include "shadow-uarch-state.hpp" +#include "step-log.hpp" +#include "strict-aliasing.hpp" +#include "throw.hpp" +#include "uarch-constants.hpp" +#include "uarch-pristine-state-hash.hpp" + +namespace cartesi { + +/// \brief No-op printout sink: the default replay produces no printout and pulls in no host-only +/// machinery, so the replay state access compiles for the freestanding (uarch/risc0) builds. The +/// real, allocating printer lives in step-pretty-printer.hpp and is selected only on the host. +struct no_step_printout { + void begin_bracket(const char * /*text*/) const {} + void end_bracket(const char * /*text*/) const {} + void read(const char * /*name*/, uint64_t /*paddr*/, uint64_t /*val*/) const {} + void write(const char * /*name*/, uint64_t /*paddr*/, uint64_t /*old_val*/, uint64_t /*new_val*/) const {} +}; + +/// \brief Provides machine state from a uarch step binary log +/// \tparam Printer Printout sink for the replay. Defaults to no_step_printout, which compiles to +/// nothing; the host selects step_pretty_printer to obtain a human-readable dump of the replay. +template +class uarch_replay_step_state_access : + public i_uarch_state_access>, + public i_accept_scoped_notes>, + public i_prefer_shadow_uarch_state> { +public: + struct context { + step_log log; ///< Parsed step log (witnessed tree) + bool reverted{false}; ///< Set when the reset reverted the state on a rejected input + machine_hash reverted_root_hash{}; ///< Canonical post-state hash when reverted (the revert root hash) + Printer printer{}; ///< Receives the printout; a no-op for the default no_step_printout + }; + +private: + context &m_context; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + +public: + /// \brief Construct from a log image + /// \param context Context to be filled with replay step log data + /// \param log_image Pointer to the step log file bytes + /// \param log_size Size of the log bytes + /// \throw runtime_error if the log is malformed or the initial root hash does not match + uarch_replay_step_state_access(context &context, unsigned char *log_image, uint64_t log_size) : m_context(context) { + m_context.log = step_log::decode(log_image, log_size); + } + + /// \brief Finish the replay and check the final machine root hash + void finish() { + // When the reset reverted the state on a rejected input, the canonical root hash after the + // step is the recorded revert root hash, not the recomputed tree root (which reflects the + // pristine uarch). + machine_hash expected_final_root_hash{}; + if (m_context.reverted) { + // Revert substitutes the recorded root instead of recomputing it; still assert no node was + // left unconsumed (compute_root_hash makes this assertion on the non-reverted path). + m_context.log.check_all_nodes_consumed(); + expected_final_root_hash = m_context.reverted_root_hash; + } else { + expected_final_root_hash = m_context.log.compute_root_hash(true); + } + if (expected_final_root_hash != m_context.log.root_hash_after) { + THROW(std::runtime_error, "final root hash mismatch"); + } + } + +private: + host_addr paddr_to_haddr(uint64_t paddr) const { + const auto paddr_page = paddr & ~PAGE_OFFSET_MASK; + auto *page_log = m_context.log.find_page(paddr_page); + const auto offset = paddr & PAGE_OFFSET_MASK; + return cast_ptr_to_host_addr(page_log->data) + offset; + } + + /// Like paddr_to_haddr, but invalidates the page's scratch hash so the after-root pass rehashes it. + host_addr paddr_to_haddr_for_write(uint64_t paddr) const { + const auto paddr_page = paddr & ~PAGE_OFFSET_MASK; + auto *page_log = m_context.log.find_page(paddr_page); + page_log->hash = machine_hash{}; + const auto offset = paddr & PAGE_OFFSET_MASK; + return cast_ptr_to_host_addr(page_log->data) + offset; + } + + // ----- + // i_prefer_shadow_uarch_state interface implementation + // ----- + friend i_prefer_shadow_uarch_state; + + uint64_t do_read_shadow_uarch_state(shadow_uarch_state_what what) const { + const auto paddr = static_cast(what); + const auto val = aliased_aligned_read(paddr_to_haddr(paddr)); + m_context.printer.read(shadow_uarch_state_get_what_name(what), paddr, val); + return val; + } + + void do_write_shadow_uarch_state(shadow_uarch_state_what what, uint64_t val) const { + const auto paddr = static_cast(what); + const auto haddr = paddr_to_haddr_for_write(paddr); + const auto old_val = aliased_aligned_read(haddr); + m_context.printer.write(shadow_uarch_state_get_what_name(what), paddr, old_val, val); + aliased_aligned_write(haddr, val); + } + + // ----- + // i_uarch_state_access interface implementation + // ----- + friend i_uarch_state_access; + + uint64_t do_read_word(uint64_t paddr) const { + const auto val = aliased_aligned_read(paddr_to_haddr(paddr)); + m_context.printer.read(nullptr, paddr, val); + return val; + } + + void do_write_word(uint64_t paddr, uint64_t val) const { + const auto haddr = paddr_to_haddr_for_write(paddr); + const auto old_val = aliased_aligned_read(haddr); + m_context.printer.write(nullptr, paddr, old_val, val); + aliased_aligned_write(haddr, val); + } + + void do_write_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, + uint64_t pma_index) const { + const auto write_field = [this, set_index, slot_index](shadow_tlb_what what, uint64_t val) { + aliased_aligned_write( + paddr_to_haddr_for_write(shadow_tlb_get_abs_addr(set_index, slot_index, what)), val); + }; + write_field(shadow_tlb_what::vaddr_page, vaddr_page); + write_field(shadow_tlb_what::vp_offset, vp_offset); + write_field(shadow_tlb_what::pma_index, pma_index); + write_field(shadow_tlb_what::zero_padding_, UINT64_C(0)); + } + + void do_reset_uarch() const { + // The log must contain a node covering the full uarch state region + // whose post-state matches the well-known pristine uarch hash. + const auto *node = m_context.log.try_find_node(UARCH_STATE_START_ADDRESS); + if (node == nullptr || node->log2_size != static_cast(UARCH_STATE_LOG2_SIZE)) { + THROW(std::runtime_error, "reset uarch node not found in log"); + } + if (node->hash_after != get_uarch_pristine_state_hash()) { + THROW(std::runtime_error, "reset uarch node has wrong post-hash"); + } + m_context.log.consumed_node_count++; + } + + void do_revert_state() const { + // The canonical post-state hash is the recorded revert root hash, read raw from its leaf in the + // witnessed shadow page. finish() compares it, not the recomputed tree root, against the header. + constexpr uint64_t paddr = AR_SHADOW_REVERT_ROOT_HASH_START; + const auto *page_log = m_context.log.find_page(paddr & ~PAGE_OFFSET_MASK); + std::copy_n(page_log->data + (paddr & PAGE_OFFSET_MASK), m_context.reverted_root_hash.size(), + m_context.reverted_root_hash.begin()); + m_context.reverted = true; + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + bool do_putchar(uint8_t /*c*/) const { + return false; + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + constexpr const char *do_get_name() const { + return "uarch_replay_step_state_access"; + } + + // ----- + // i_accept_scoped_notes interface implementation + // ----- + friend i_accept_scoped_notes; + + // A real scoped_note (not the no-op default) so per-instruction brackets reach the printer. + auto do_make_scoped_note(const char *text) const { + return scoped_note{*this, text}; + } + + void do_push_begin_bracket(const char *text) const { + m_context.printer.begin_bracket(text); + } + + void do_push_end_bracket(const char *text) const { + m_context.printer.end_bracket(text); + } +}; + +} // namespace cartesi + +#endif diff --git a/src/uarch-reset-state.cpp b/src/uarch-reset-state.cpp index 3977195f8..4bf1383e9 100644 --- a/src/uarch-reset-state.cpp +++ b/src/uarch-reset-state.cpp @@ -21,8 +21,10 @@ #include "uarch-reset-state.hpp" -#include "uarch-record-state-access.hpp" // IWYU pragma: keep -#include "uarch-replay-state-access.hpp" // IWYU pragma: keep +#include "htif-constants.hpp" +#include "uarch-constants.hpp" +#include "uarch-record-step-state-access.hpp" // IWYU pragma: keep +#include "uarch-replay-step-state-access.hpp" // IWYU pragma: keep #include "uarch-solidity-compat.hpp" #include "uarch-state-access.hpp" // IWYU pragma: keep @@ -31,16 +33,25 @@ namespace cartesi { template void uarch_reset_state(UarchState &a) { resetState(a); + // When the machine has rejected an input, the canonical state after the operation is + // the one recorded in the revert root hash (which has a pristine uarch) + uint64 iflagsY = readWord(a, IFLAGS_Y_ADDRESS); + if (iflagsY != 0) { + uint64 tohost = readWord(a, HTIF_TOHOST_ADDRESS); + if (isYieldedManualWith(tohost, HTIF_YIELD_MANUAL_REASON_RX_REJECTED)) { + revertState(a); + } + } } // Explicit instantiation for uarch_state_access template void uarch_reset_state(uarch_state_access &a); -// Explicit instantiation for uarch_record_state_access -template void uarch_reset_state(uarch_record_state_access &a); +// Explicit instantiation for uarch_record_step_state_access +template void uarch_reset_state(uarch_record_step_state_access &a); -// Explicit instantiation for uarch_replay_state_access -template void uarch_reset_state(uarch_replay_state_access &a); +// Explicit instantiation for uarch_replay_step_state_access +template void uarch_reset_state(uarch_replay_step_state_access &a); } // namespace cartesi // NOLINTEND(google-readability-casting,misc-const-correctness,modernize-use-auto,hicpp-use-auto) diff --git a/src/uarch-reset-state.hpp b/src/uarch-reset-state.hpp index 3302147d4..d9d5accae 100644 --- a/src/uarch-reset-state.hpp +++ b/src/uarch-reset-state.hpp @@ -26,20 +26,15 @@ template void uarch_reset_state(STATE_ACCESS &a); class uarch_state_access; -class uarch_record_state_access; -class uarch_replay_state_access; +class uarch_record_step_state_access; +struct no_step_printout; +template +class uarch_replay_step_state_access; -// Declaration of explicit instantiation in module uarch-reset-state.cpp +// Declaration of explicit instantiations in module uarch-reset-state.cpp extern template void uarch_reset_state(uarch_state_access &a); - -// Declaration of explicit instantiation in module uarch-reset-state.cpp -extern template void uarch_reset_state(uarch_state_access &a); - -// Declaration of explicit instantiation in module uarch-reset-state.cpp -extern template void uarch_reset_state(uarch_record_state_access &a); - -// Declaration of explicit instantiation in module uarch-reset-state.cpp -extern template void uarch_reset_state(uarch_replay_state_access &a); +extern template void uarch_reset_state(uarch_record_step_state_access &a); +extern template void uarch_reset_state(uarch_replay_step_state_access &a); } // namespace cartesi diff --git a/src/uarch-solidity-compat.hpp b/src/uarch-solidity-compat.hpp index 5ade287c3..c10e3ae0b 100644 --- a/src/uarch-solidity-compat.hpp +++ b/src/uarch-solidity-compat.hpp @@ -20,15 +20,18 @@ #include #include +#include "address-range-constants.hpp" #include "assert-printf.hpp" +#include "htif-constants.hpp" +#include "machine-hash.hpp" +#include "shadow-registers.hpp" #include "shadow-tlb.hpp" /// \file -/// \brief Solidity Compatibility Layer -/// \brief The purpose of this file is to facilitate porting the uarch instruction interpreter to Solidity. -/// \brief The uarch interpreter implementation uses functions from this file to perform operations not available -/// \brief or whose behavior differ in Solidity. -/// \brief Arithmetic overflow should never cause exceptions. +/// \brief Solidity compatibility layer for porting the uarch instruction interpreter to Solidity. +/// +/// The uarch interpreter uses these functions for operations that are unavailable or behave differently +/// in Solidity. Arithmetic overflow must never raise exceptions. namespace cartesi { @@ -42,6 +45,9 @@ using uint32 = uint32_t; using int64 = int64_t; using uint64 = uint64_t; using bytes = const unsigned char *; +// Solidity's bytes32. The transpiler leaves the name unchanged, mapping it directly to Solidity's +// native bytes32. +using bytes32 = const_machine_hash_view; // Wrapperfunctions used to access data from the uarch state accessor @@ -100,6 +106,11 @@ static inline void resetState(const UarchState a) { a.reset_uarch(); } +template +static inline void revertState(const UarchState a) { + a.revert_state(); +} + template static inline uint64 readIflagsY(State &a) { return a.read_iflags_Y(); @@ -115,6 +126,22 @@ static inline void writeHtifFromhost(State &a, uint64 val) { a.write_htif_fromhost(val); } +template +static inline uint64 readHtifTohost(State &a) { + return a.read_htif_tohost(); +} + +// The revert root hash is a 32-byte machine hash stored raw in its dedicated shadow slot. The page +// model hashes the bytes as-is, so the write must produce the same page bytes across all replayers. +static constexpr uint64 REVERT_ROOT_HASH_LENGTH = 32; + +template +static inline void writeRevertRootHash(State &a, bytes32 revertRootHash) { + // The step recorder supports padded-memory writes, not raw write_memory. A 32-byte (2^5) write + // fills the shadow slot exactly with no padding, landing the hash bytes in their page verbatim. + a.write_memory_with_padding(AR_SHADOW_REVERT_ROOT_HASH_START, revertRootHash.data(), REVERT_ROOT_HASH_LENGTH, 5); +} + template static inline void writeMemoryWithPadding(State &a, uint64 paddr, bytes data, uint64_t data_length, int32 write_length_log2_size) { @@ -131,11 +158,6 @@ static inline void putCharECALL(const UarchState a, uint8 c) { a.putchar(c); } -template -static inline void markDirtyPageECALL(const UarchState a, uint64 paddr, uint64 pma_index) { - a.mark_dirty_page(paddr, pma_index); -} - template static inline void writeTlbECALL(const UarchState a, uint64 set_index, uint64 slot_index, uint64 vaddr_page, uint64 vp_offset, uint64 pma_index) { @@ -218,6 +240,13 @@ static inline uint32 uint32Log2(uint32 v) { return 31 - __builtin_clz(v); } +static inline bool isYieldedManualWith(uint64 tohost, uint64 yieldReason) { + const uint64 dev = uint64ShiftRight(tohost & HTIF_DEV_MASK, HTIF_DEV_SHIFT); + const uint64 cmd = uint64ShiftRight(tohost & HTIF_CMD_MASK, HTIF_CMD_SHIFT); + const uint64 reason = uint64ShiftRight(tohost & HTIF_REASON_MASK, HTIF_REASON_SHIFT); + return dev == HTIF_DEV_YIELD && cmd == HTIF_YIELD_CMD_MANUAL && reason == yieldReason; +} + template void require([[maybe_unused]] T1 condition, [[maybe_unused]] T2 message) { assert((condition) && (message)); diff --git a/src/uarch-state-access.hpp b/src/uarch-state-access.hpp index 43dd0cb57..4c44f66f3 100644 --- a/src/uarch-state-access.hpp +++ b/src/uarch-state-access.hpp @@ -95,12 +95,6 @@ class uarch_state_access : return false; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - void do_mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { - // Forward to machine - m_m.mark_dirty_page(paddr, pma_index); - } - void do_write_tlb(TLB_set_index set_index, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, uint64_t pma_index) const { // Forward to machine @@ -112,6 +106,11 @@ class uarch_state_access : m_m.reset_uarch(); } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + void do_revert_state() const { + ; // the physical machine state does not revert + } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) constexpr const char *do_get_name() const { return "uarch_state_access"; diff --git a/src/uarch-step.cpp b/src/uarch-step.cpp index 0c73e7c44..0f0d740d7 100644 --- a/src/uarch-step.cpp +++ b/src/uarch-step.cpp @@ -22,8 +22,9 @@ #include "uarch-step.hpp" #include "collect-uarch-cycle-hashes-state-access.hpp" // IWYU pragma: keep -#include "uarch-record-state-access.hpp" // IWYU pragma: keep -#include "uarch-replay-state-access.hpp" // IWYU pragma: keep +#include "step-pretty-printer.hpp" // IWYU pragma: keep +#include "uarch-record-step-state-access.hpp" // IWYU pragma: keep +#include "uarch-replay-step-state-access.hpp" // IWYU pragma: keep #include "uarch-state-access.hpp" // IWYU pragma: keep #include "uarch-constants.hpp" @@ -880,12 +881,6 @@ static inline void executeECALL(const UarchState a, uint32 insn, uint64 pc) { putCharECALL(a, uint8(c)); // Can be a NOOP in Solidity return advancePc(a, pc); } - if (fn == UARCH_ECALL_FN_MARK_DIRTY_PAGE) { - uint64 paddr = readX(a, 10); // a0 contains physical address in page to be marked dirty - uint64 pma_index = readX(a, 11); // a1 contains a index of PMA where page falls - markDirtyPageECALL(a, paddr, pma_index); // This MUST be be a NOOP in Solidity - return advancePc(a, pc); - } if (fn == UARCH_ECALL_FN_WRITE_TLB) { uint64 set_index = readX(a, 10); // a0 contains TLB set (code, read, write) uint64 slot_index = readX(a, 11); // a1 contains slot_index to modify @@ -1093,7 +1088,7 @@ static inline void executeInsn(const UarchState a, uint32 insn, uint64 pc) { template UArchStepStatus uarch_step(const UarchState a) { - // This must be the first read in order to match the first log access in machine::verify_step_uarch + // Read the cycle first so the overflow guard below runs before any state is mutated uint64 cycle = readCycle(a); // do not advance if cycle will overflow if (cycle >= UARCH_CYCLE_MAX) { @@ -1115,14 +1110,15 @@ UArchStepStatus uarch_step(const UarchState a) { // Explicit instantiation for uarch_state_access template UArchStepStatus uarch_step(const uarch_state_access a); -// Explicit instantiation for uarch_record_state_access -template UArchStepStatus uarch_step(const uarch_record_state_access a); - -// Explicit instantiation for uarch_replay_state_access -template UArchStepStatus uarch_step(const uarch_replay_state_access a); - // Explicit instantiation for collect_uarch_cycle_hashes_state_access template UArchStepStatus uarch_step(const collect_uarch_cycle_hashes_state_access a); +// Explicit instantiation for uarch_record_step_state_access +template UArchStepStatus uarch_step(const uarch_record_step_state_access a); + +// Explicit instantiation for uarch_replay_step_state_access (replay/verify and the host printout) +template UArchStepStatus uarch_step(const uarch_replay_step_state_access a); +template UArchStepStatus uarch_step(const uarch_replay_step_state_access a); + } // namespace cartesi // NOLINTEND(google-readability-casting,misc-const-correctness,modernize-use-auto,hicpp-use-auto) diff --git a/src/uarch-step.hpp b/src/uarch-step.hpp index af38fbffe..b17e526ec 100644 --- a/src/uarch-step.hpp +++ b/src/uarch-step.hpp @@ -36,14 +36,18 @@ UArchStepStatus uarch_step(STATE_ACCESS a); // Forward declarations class uarch_state_access; class collect_uarch_cycle_hashes_state_access; -class uarch_record_state_access; -class uarch_replay_state_access; +class uarch_record_step_state_access; +struct no_step_printout; +class step_pretty_printer; +template +class uarch_replay_step_state_access; // Declaration of explicit instantiations in module uarch-step.cpp extern template UArchStepStatus uarch_step(uarch_state_access a); extern template UArchStepStatus uarch_step(collect_uarch_cycle_hashes_state_access a); -extern template UArchStepStatus uarch_step(uarch_record_state_access a); -extern template UArchStepStatus uarch_step(uarch_replay_state_access a); +extern template UArchStepStatus uarch_step(uarch_record_step_state_access a); +extern template UArchStepStatus uarch_step(uarch_replay_step_state_access a); +extern template UArchStepStatus uarch_step(uarch_replay_step_state_access a); } // namespace cartesi diff --git a/tests/Dockerfile b/tests/Dockerfile index f6d6a43b4..fe492dbc0 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -21,7 +21,6 @@ ENV CARTESI_IMAGES_PATH=/usr/share/cartesi-machine/tests/data/images ENV CARTESI_TESTS_PATH=/usr/share/cartesi-machine/tests/data/machine ENV CARTESI_TESTS_UARCH_PATH=/usr/share/cartesi-machine/tests/data/uarch ENV CARTESI_CMIO_PATH=/tmp/cartesi-machine/tests/data/cmio -ENV CARTESI_STEP_LOGS_PATH=/tmp/cartesi-machine/tests/data/step-logs USER root diff --git a/tests/Makefile b/tests/Makefile index 92f1bc592..b5bd3e373 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -31,13 +31,11 @@ TARGETS_WITH_TOOLCHAIN = $(addsuffix -with-toolchain,build-tests-machine build-t CARTESI_IMAGES_PATH ?= $(abspath ./build/images) CARTESI_CMIO_PATH ?= $(abspath ./build/cmio) -CARTESI_STEP_LOGS_PATH ?= $(abspath ./build/step-logs) CARTESI_TESTS_PATH ?= $(abspath ./build/machine) CARTESI_TESTS_UARCH_PATH ?= $(abspath ./build/uarch) export CARTESI_IMAGES_PATH export CARTESI_CMIO_PATH -export CARTESI_STEP_LOGS_PATH export CARTESI_TESTS_PATH export CARTESI_TESTS_UARCH_PATH @@ -90,7 +88,7 @@ CARTESI_IMAGES = $(CARTESI_IMAGES_PATH)/linux.bin STYLUA=stylua STYLUA_FLAGS=--indent-type Spaces --respect-ignores -build-tests-all: misc build-tests-machine-with-toolchain build-tests-uarch-with-toolchain images $(CARTESI_CMIO_PATH) $(CARTESI_STEP_LOGS_PATH) +build-tests-all: misc build-tests-machine-with-toolchain build-tests-uarch-with-toolchain images $(CARTESI_CMIO_PATH) build-tests-machine: machine riscv-tests @@ -193,13 +191,6 @@ clean-machines: @rm -rf $(CARTESI_CMIO_PATH) -create-step-logs $(CARTESI_STEP_LOGS_PATH): | $(CARTESI_IMAGES) - $(LUA) ./lua/create-step-logs.lua - $(LUA) ./lua/cartesi-machine-tests.lua --jobs=$(NUM_JOBS) --hash-function=sha256 --save-step-logs=$(CARTESI_STEP_LOGS_PATH) run_step - -clean-step-logs: - @rm -rf $(CARTESI_STEP_LOGS_PATH) - test-cmio: | $(CARTESI_CMIO_PATH) @./scripts/test-cmio.sh ../src/cartesi-jsonrpc-machine '$(LUA) ../src/cartesi-machine.lua' @@ -245,10 +236,6 @@ test-fuzz: build-tests-fuzz test-misc: test-c-api test-save-and-load test-yield-and-save -test-generate-uarch-logs: $(BUILDDIR)/uarch-riscv-tests-json-logs - $(LUA) ./lua/uarch-riscv-tests.lua --output-dir=$(BUILDDIR)/uarch-riscv-tests-json-logs --create-reset-uarch-log --create-send-cmio-response-log --jobs=$(NUM_JOBS) json-step-logs - tar -czf uarch-riscv-tests-json-logs.tar.gz -C $(BUILDDIR) uarch-riscv-tests-json-logs - test-interactive: echo uname -a | $(LUA) ../src/cartesi-machine.lua --quiet --no-init-splash -it sh | grep -q "riscv64 GNU/Linux" echo uname -a | $(LUA) ../src/cartesi-machine.lua --quiet --no-init-splash -i sh | grep -q "riscv64 GNU/Linux" diff --git a/tests/fuzz/fuzz-interpret-step.cpp b/tests/fuzz/fuzz-interpret-step.cpp index 0c402fe5a..a340c0b91 100644 --- a/tests/fuzz/fuzz-interpret-step.cpp +++ b/tests/fuzz/fuzz-interpret-step.cpp @@ -123,6 +123,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const auto dir3 = tmpdir.sub("m3"); const auto dir4 = tmpdir.sub("m4"); const auto log_file = tmpdir.sub("step.log"); + const auto uarch_log_file = tmpdir.sub("step_uarch.log"); + const auto reset_log_file = tmpdir.sub("reset_uarch.log"); // Store machine state to disk so we can clone it if (cm_store(m0, store_dir.c_str(), CM_SHARING_ALL) != CM_ERROR_OK) { @@ -175,12 +177,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { cm_hash hb{}; cm_hash ha{}; cm_get_root_hash(m3, &hb); - const char *log = nullptr; - if (cm_log_step_uarch(m3, CM_ACCESS_LOG_TYPE_LARGE_DATA, &log) != CM_ERROR_OK) { + // log_step_uarch requires the target file to not exist + std::filesystem::remove(uarch_log_file); + if (cm_log_step_uarch(m3, 1, uarch_log_file.c_str(), nullptr) != CM_ERROR_OK) { fuzz_abort("cm_log_step_uarch failed"); } cm_get_root_hash(m3, &ha); - if (cm_verify_step_uarch(m3, &hb, log, &ha) != CM_ERROR_OK) { + if (cm_verify_step_uarch(m3, &hb, uarch_log_file.c_str(), 1, &ha) != CM_ERROR_OK) { fuzz_abort("cm_verify_step_uarch failed"); } uint64_t halt = 0; @@ -199,12 +202,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { cm_hash hb{}; cm_hash ha{}; cm_get_root_hash(m3, &hb); - const char *log = nullptr; - if (cm_log_reset_uarch(m3, CM_ACCESS_LOG_TYPE_LARGE_DATA, &log) != CM_ERROR_OK) { + std::filesystem::remove(reset_log_file); + if (cm_log_reset_uarch(m3, reset_log_file.c_str()) != CM_ERROR_OK) { fuzz_abort("cm_log_reset_uarch failed"); } cm_get_root_hash(m3, &ha); - if (cm_verify_reset_uarch(m3, &hb, log, &ha) != CM_ERROR_OK) { + if (cm_verify_reset_uarch(m3, &hb, reset_log_file.c_str(), &ha) != CM_ERROR_OK) { fuzz_abort("cm_verify_reset_uarch failed"); } } diff --git a/tests/lua/cartesi-machine-tests.lua b/tests/lua/cartesi-machine-tests.lua index 77841e166..438852f5a 100755 --- a/tests/lua/cartesi-machine-tests.lua +++ b/tests/lua/cartesi-machine-tests.lua @@ -21,6 +21,7 @@ local util = require("cartesi.util") local test_util = require("cartesi.tests.util") local tabular = require("cartesi.tabular") local parallel = require("cartesi.parallel") +local manifest_mod = require("cartesi.tests.step_log_manifest") local jsonrpc -- Tests Cases @@ -295,10 +296,11 @@ local riscv_tests = { { "compressed.bin", 410 }, { "thrash-tlb.bin", 3363 }, { "thrash-tlb.bin", 3363, nil, { "pre-thrash-tlb.lua", "post-thrash-tlb.lua" } }, + -- worst-case step-log footprint generators (see step_max_pages*.S) + { "step_max_pages.bin", 123 }, + { "step_max_pages_flush.bin", 1037 }, } -local log_annotations = false - -- Microarchitecture configuration local uarch @@ -328,13 +330,10 @@ where options are: run N tests in parallel (default: 1, i.e., run tests sequentially) - --log-annotations - include annotations in logs - --periodic-action=[,] stop execution every of uarch cycles and perform action. If is given, the periodic action will start at that - uarch cycle. Only take effect with hash and step commands. + uarch cycle. Only take effect with the hash command. (default: none) --remote-address=: @@ -342,7 +341,7 @@ where options are: running a local cartesi machine. --output= - write the output of hash and step commands to the file at + write the output of the hash command to the file at . If the argument is not present the output is written to stdout. (default: none) @@ -370,6 +369,10 @@ and command can be: run_step run all tests by recording and verifying each test execution into a step log file + run_step_uarch + run all tests one uarch step at a time, recording and verifying each step into a + step log file, and check the final hash against a reference machine + run_uarch run test in the microarchitecture and report if payload and cycles match expected @@ -382,9 +385,6 @@ and command can be: hash output root hash at every of cycles - step - output json log of step at every of cycles - dump dump machine initial state memory ranges on current directory @@ -516,16 +516,6 @@ local options = { return true end, }, - { - "^%-%-log%-annotations$", - function(o) - if not o then - return false - end - log_annotations = true - return true - end, - }, { "^(%-%-periodic%-action%=(.*))$", function(all, v) @@ -862,89 +852,6 @@ local function print_machines(tests) end end -local function step(tests) - local out = io.stdout - if output then - out = assert(io.open(output, "w"), "error opening file: " .. output) - end - local indentout = util.indentout - local log_type = (log_annotations and cartesi.ACCESS_LOG_TYPE_ANNOTATIONS or 0) - out:write("[\n") - for i, test in ipairs(tests) do - local ram_image = test[1] - local expected_cycles = test[2] - local expected_payload = test[3] or 0 - local machine = build_machine(ram_image) - indentout(out, 1, "{\n") - indentout(out, 2, '"test": "%s",\n', ram_image) - if periodic_action then - indentout(out, 2, '"period": %u,\n', periodic_action_period) - indentout(out, 2, '"start": %u,\n', periodic_action_start) - end - indentout(out, 2, '"steps": [\n') - local total_logged_steps = 0 - local total_uarch_cycles = 0 - local max_mcycle = 2 * expected_cycles - while math.ult(machine:read_reg("mcycle"), max_mcycle) do - local uarch_cycle_increment = 0 - local next_action_uarch_cycle - if periodic_action then - next_action_uarch_cycle = periodic_action_start - if next_action_uarch_cycle <= total_uarch_cycles then - next_action_uarch_cycle = next_action_uarch_cycle - + ( - (((total_uarch_cycles - periodic_action_start) // periodic_action_period) + 1) - * periodic_action_period - ) - end - uarch_cycle_increment = next_action_uarch_cycle - total_uarch_cycles - end - local init_uarch_cycle = machine:read_reg("uarch_cycle") - machine:run_uarch(machine:read_reg("uarch_cycle") + uarch_cycle_increment) - local final_uarch_cycle = machine:read_reg("uarch_cycle") - total_uarch_cycles = total_uarch_cycles + (final_uarch_cycle - init_uarch_cycle) - if machine:read_reg("uarch_halt_flag") then - machine:reset_uarch() - if machine:read_reg("iflags_H") ~= 0 then - break - end - end - if not periodic_action or total_uarch_cycles == next_action_uarch_cycle then - local init_mcycle = machine:read_reg("mcycle") - init_uarch_cycle = machine:read_reg("uarch_cycle") - local log = machine:log_step_uarch(log_type) - local final_mcycle = machine:read_reg("mcycle") - final_uarch_cycle = machine:read_reg("uarch_cycle") - if total_logged_steps > 0 then - out:write(",\n") - end - util.dump_json_log(log, init_mcycle, init_uarch_cycle, final_mcycle, final_uarch_cycle, out, 3) - total_uarch_cycles = total_uarch_cycles + 1 - total_logged_steps = total_logged_steps + 1 - if machine:read_reg("uarch_halt_flag") then - machine:reset_uarch() - if machine:read_reg("iflags_H") ~= 0 then - break - end - end - end - end - indentout(out, 2, "]\n") - if tests[i + 1] then - indentout(out, 1, "},\n") - else - indentout(out, 1, "}\n") - end - if - machine:read_reg("htif_tohost_data") >> 1 ~= expected_payload - or machine:read_reg("mcycle") ~= expected_cycles - then - os.exit(1, true) - end - end - out:write("]\n") -end - local function dump(tests) local ram_image = tests[1][1] local machine = build_machine(ram_image) @@ -1047,6 +954,16 @@ local function run_host_and_uarch_machines(host_machine, uarch_machine, ctx, max return host_cycles end +-- A ram_image can be run both plain and with a prepost variant; give them distinct +-- fixture names so each saved log stays paired with its own manifest root hashes. +local function step_log_name(row) + local base = row.ram_image:match("^(.+)%.bin$") or row.ram_image + if row.prepost and row.prepost[1] then + base = base .. "-prepost" + end + return base +end + local function run_machine_step(machine, reference_machine, ctx, mcycle_count) local log_filename = os.tmpname() local delete_temp = true @@ -1075,14 +992,85 @@ local function run_machine_step(machine, reference_machine, ctx, mcycle_count) fatal("%s: failed. Final hash does not match reference machine\n", ctx.ram_image) end ctx.read_htif_tohost_data = machine:read_reg("htif_tohost_data") - -- save step log if requested + -- Save the step log plus a manifest fragment carrying the live-machine root + -- hashes (the replayer's Layer-2 source of truth). Fragments are merged after. if save_step_logs_dir then - local test_name = ctx.ram_image:match("^(.+)%.bin$") or ctx.ram_image + local test_name = step_log_name(ctx) local final_name = string.format("step-%s.log", test_name) - local final_path = save_step_logs_dir:gsub("/*$", "/") .. final_name + local logs_dir = save_step_logs_dir:gsub("/+$", "") + local final_path = logs_dir .. "/" .. final_name local cmd = string.format("cp '%s' '%s'", log_filename, final_path) assert(os.execute(cmd), "failed to copy step log to " .. final_path) + manifest_mod.write_fragment(logs_dir, test_name, { + kind = "machine", + name = final_name, + hash_function = hash_function or "keccak256", + requested_cycle_count = mcycle_count, + initial_root_hash = root_hash_before, + final_root_hash = root_hash_after, + }) + end +end + +local function run_machine_step_uarch(machine, reference_machine, ctx, max_mcycle) + local log_filename = os.tmpname() + local delete_temp = true + local deleter = {} + setmetatable(deleter, { + __gc = function() + if delete_temp then + os.remove(log_filename) + end + end, + }) + local test_cycles = machine:read_reg("mcycle") + local ref_cycles = reference_machine:read_reg("mcycle") + if test_cycles ~= ref_cycles then + fatal("%s: test_cycles ~= ref_cycles: %d ~= %d", ctx.ram_image, test_cycles, ref_cycles) + end + while math.ult(test_cycles, max_mcycle) do + local test_hash = machine:get_root_hash() + local ref_hash = reference_machine:get_root_hash() + if test_hash ~= ref_hash then + fatal( + "%s: Hash mismatch at mcycle %d: %s ~= %s", + ctx.ram_image, + test_cycles, + util.hexhash(test_hash), + util.hexhash(ref_hash) + ) + end + reference_machine:run(1 + ref_cycles) + -- Test machine advances one mcycle: log all its uarch cycles, then verify the round-trip. + local root_hash_before = machine:get_root_hash() + os.remove(log_filename) + machine:log_step_uarch(math.maxinteger, log_filename) + local root_hash_after = machine:get_root_hash() + cartesi.machine:verify_step_uarch(root_hash_before, log_filename, math.maxinteger, root_hash_after) + machine:reset_uarch() + test_cycles = machine:read_reg("mcycle") + ref_cycles = reference_machine:read_reg("mcycle") + if test_cycles ~= ref_cycles then + fatal("%s: test_cycles ~= ref_cycles: %d ~= %d", ctx.ram_image, test_cycles, ref_cycles) + end + local test_iflags_H = machine:read_reg("iflags_H") ~= 0 + local ref_iflags_H = reference_machine:read_reg("iflags_H") ~= 0 + if test_iflags_H ~= ref_iflags_H then + fatal( + "%s: test_iflags_H ~= ref_iflags_H: %s ~= %s", + ctx.ram_image, + tostring(test_iflags_H), + tostring(ref_iflags_H) + ) + end + if test_iflags_H then + break + end + end + if machine:get_root_hash() ~= reference_machine:get_root_hash() then + fatal("%s: failed. Final hash does not match reference machine\n", ctx.ram_image) end + ctx.read_htif_tohost_data = machine:read_reg("htif_tohost_data") end local failures = nil @@ -1110,6 +1098,31 @@ elseif command == "run_step" then check_and_print_result(machine, row) post_fn(machine, pre_ctx) end) + -- Merge the per-test manifest fragments written by the workers (deduped, since + -- prepost variants can repeat a ram_image) into one _manifest.csv. + if save_step_logs_dir and (not failures or failures == 0) then + local logs_dir = save_step_logs_dir:gsub("/+$", "") + local seen, keys = {}, {} + for _, row in ipairs(contexts) do + local test_name = step_log_name(row) + if not seen[test_name] then + seen[test_name] = true + keys[#keys + 1] = test_name + end + end + manifest_mod.concat_fragments(logs_dir, keys) + end +elseif command == "run_step_uarch" then + failures = parallel.run(contexts, jobs, function(row) + local pre_fn, post_fn = load_prepost(row.prepost) + local machine = build_machine(row.ram_image) + local reference_machine = build_machine(row.ram_image) + local pre_ctx = pre_fn(machine) + pre_fn(reference_machine) + run_machine_step_uarch(machine, reference_machine, row, row.expected_cycles) + check_and_print_result(machine, row) + post_fn(machine, pre_ctx) + end) elseif command == "run_uarch" then failures = parallel.run(contexts, jobs, function(row) local pre_fn, post_fn = load_prepost(row.prepost) @@ -1148,8 +1161,6 @@ elseif command == "run_host_and_uarch" then end) elseif command == "hash" then hash(selected_tests) -elseif command == "step" then - step(selected_tests) elseif command == "dump" then dump(selected_tests) elseif command == "list" then diff --git a/tests/lua/cartesi/tests/step_log_manifest.lua b/tests/lua/cartesi/tests/step_log_manifest.lua new file mode 100644 index 000000000..c0f8209bd --- /dev/null +++ b/tests/lua/cartesi/tests/step_log_manifest.lua @@ -0,0 +1,84 @@ +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: LGPL-3.0-or-later +-- +-- This program is free software: you can redistribute it and/or modify it under +-- the terms of the GNU Lesser General Public License as published by the Free +-- Software Foundation, either version 3 of the License, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +-- PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +-- +-- You should have received a copy of the GNU Lesser General Public License along +-- with this program (see COPYING). If not, see . +-- + +-- Shared step-log fixture manifest (CSV, one per fixture directory). +-- The expected root hashes are captured from the live recording machine, not +-- re-read from the log header, so a corrupted header is caught (verifier Layer 2). +-- +-- The cmio `data` column is the raw payload as plain ASCII. Safe only because +-- the recorder controls the payload and emits no CSV-breaking byte (comma, +-- newline, or quote). + +local M = {} + +M.MANIFEST_NAME = "_manifest.csv" +M.HEADER = "kind,name,expectError,hashFunction,requestedCycleCount,rootHashBefore,rootHashAfter,reason," + .. "dataLength,data,revertRootHash\n" + +local function hexhash(h) + return (h:gsub(".", function(c) + return string.format("%02x", string.byte(c)) + end)) +end + +-- Columns inapplicable to a kind stay blank. revert_root_hash is the value written to the +-- revert-root-hash shadow slot (cmio + reset rows). expect_error names the rejection a corrupt +-- fixture must trigger (the reject fixtures); blank means the log must replay successfully. +function M.format_row(ctx) + return string.format( + "%s,%s,%s,%s,%d,0x%s,0x%s,%s,%s,%s,%s\n", + ctx.kind, + ctx.name, + ctx.expect_error or "", + ctx.hash_function, + ctx.requested_cycle_count, + hexhash(ctx.initial_root_hash), + hexhash(ctx.final_root_hash), + ctx.reason and tostring(ctx.reason) or "", + ctx.data_length and tostring(ctx.data_length) or "", + ctx.data or "", + ctx.revert_root_hash and ("0x" .. hexhash(ctx.revert_root_hash)) or "" + ) +end + +function M.write_row(out, ctx) + out:write(M.format_row(ctx)) +end + +-- Parallel-safe: each worker writes its own fragment; the main process +-- concatenates fragments in caller order into the manifest. +function M.fragment_path(dir, key) + return dir .. "/" .. key .. "-manifest-fragment.csv" +end + +function M.write_fragment(dir, key, ctx) + local out = assert(io.open(M.fragment_path(dir, key), "w")) + M.write_row(out, ctx) +end + +-- Writes dir/_manifest.csv: header followed by each key's fragment (consuming it). +function M.concat_fragments(dir, keys) + local manifest = assert(io.open(dir .. "/" .. M.MANIFEST_NAME, "w")) + manifest:write(M.HEADER) + for _, key in ipairs(keys) do + local path = M.fragment_path(dir, key) + local frag = assert(io.open(path, "rb")) + manifest:write(frag:read("*a")) + os.remove(path) + end +end + +return M diff --git a/tests/lua/cartesi/tests/util.lua b/tests/lua/cartesi/tests/util.lua index b25133011..0a50d902f 100644 --- a/tests/lua/cartesi/tests/util.lua +++ b/tests/lua/cartesi/tests/util.lua @@ -393,4 +393,206 @@ function test_util.new_temp_file() return setmetatable(self, temp_file_meta) end +-- Binary step log helpers. Layout: see step_log_header in src/step-log.hpp. + +function test_util.read_step_log_file(filename) + local file = assert(io.open(filename, "rb")) + local signature = file:read(8) + local root_hash_before = file:read(32) + local requested_cycle_count = string.unpack(" = assert(io.open(filename, "wb")) + local page_count = logdata.override_page_count or #logdata.pages + local node_count = logdata.override_node_count or #logdata.nodes + local sibling_count = logdata.override_sibling_count or #logdata.siblings + file:write(logdata.signature) + file:write(logdata.root_hash_before) + file:write(string.pack("> PAGE_LOG2_SIZE) < end_page_index + if not page_in and not node_in then + if not target and log2_page_count > 0 then + target = { + sibling_index = next_sibling, + address = first_page_index << PAGE_LOG2_SIZE, + log2_size = log2_page_count + PAGE_LOG2_SIZE, + } + end + next_sibling = next_sibling + 1 + return + end + if + node_in + and nodes[next_node].address == (first_page_index << PAGE_LOG2_SIZE) + and nodes[next_node].log2_size == (log2_page_count + PAGE_LOG2_SIZE) + then + next_node = next_node + 1 + return + end + if log2_page_count > 0 then + walk(first_page_index, log2_page_count - 1) + walk(first_page_index + (1 << (log2_page_count - 1)), log2_page_count - 1) + else + next_page = next_page + 1 + end + end + + walk(0, ROOT_LOG2_SIZE - PAGE_LOG2_SIZE) + assert(target, "no multi-page sibling subtree available to convert into a node") + local hash = log.siblings[target.sibling_index] + table.remove(log.siblings, target.sibling_index) + log.nodes = nodes + table.insert(log.nodes, { + address = target.address, + log2_size = target.log2_size, + hash_before = hash, + hash_after = hash, + }) + table.sort(log.nodes, function(a, b) + return a.address < b.address + end) + return log +end + +-- Recompute a parsed step log's root by folding pages, nodes, and siblings over the +-- full address space, mirroring the replayer's compute_root_hash. `use_after` selects a +-- node's hash_after (post-state) over hash_before. Lets a generator tamper a page or node +-- and re-derive the matching root so the log still decodes. Same three-cursor walk as +-- inject_unconsumed_node, but returning each subtree's hash instead of locating a target. +function test_util.recompute_step_log_root(log, use_after, hash_fn) + hash_fn = hash_fn or "keccak256" + local pages = {} + for _, p in ipairs(log.pages) do + pages[#pages + 1] = p + end + table.sort(pages, function(a, b) + return a.index < b.index + end) + local nodes = {} + for _, n in ipairs(log.nodes) do + nodes[#nodes + 1] = n + end + table.sort(nodes, function(a, b) + return a.address < b.address + end) + local next_page, next_node, next_sibling = 1, 1, 1 + + local function walk(first_page_index, log2_page_count) + local end_page_index = first_page_index + (1 << log2_page_count) + local page_in = next_page <= #pages and pages[next_page].index < end_page_index + local node_in = next_node <= #nodes and (nodes[next_node].address >> PAGE_LOG2_SIZE) < end_page_index + if not page_in and not node_in then + local sibling = log.siblings[next_sibling] + next_sibling = next_sibling + 1 + return sibling + end + if + node_in + and nodes[next_node].address == (first_page_index << PAGE_LOG2_SIZE) + and nodes[next_node].log2_size == (log2_page_count + PAGE_LOG2_SIZE) + then + local node = nodes[next_node] + next_node = next_node + 1 + return use_after and node.hash_after or node.hash_before + end + if log2_page_count > 0 then + local left = walk(first_page_index, log2_page_count - 1) + local right = walk(first_page_index + (1 << (log2_page_count - 1)), log2_page_count - 1) + return cartesi[hash_fn](left, right) + end + local page = pages[next_page] + next_page = next_page + 1 + return merkle_hash(page.data, 0, PAGE_LOG2_SIZE, hash_fn) + end + + return walk(0, ROOT_LOG2_SIZE - PAGE_LOG2_SIZE) +end + return test_util diff --git a/tests/lua/cmio-test.lua b/tests/lua/cmio-test.lua index c6effd475..ba689a1e6 100755 --- a/tests/lua/cmio-test.lua +++ b/tests/lua/cmio-test.lua @@ -125,17 +125,19 @@ local function load_machine(name) end local function next_input(machine, reason, data) - machine:send_cmio_response(reason, data) + -- No dispute checkpoint here; use the pre-input root as the revert target (the protocol's job + -- on-chain, but a sensible default when just driving the machine forward). + machine:send_cmio_response(machine:get_root_hash(), reason, data) end local function setup_advance(machine, data) assert(data) - local reason = cartesi.CMIO_YIELD_REASON_ADVANCE_STATE + local reason = cartesi.HTIF_YIELD_REASON_ADVANCE_STATE next_input(machine, reason, data) end local function setup_inspect(machine, data) - local reason = cartesi.CMIO_YIELD_REASON_INSPECT_STATE + local reason = cartesi.HTIF_YIELD_REASON_INSPECT_STATE next_input(machine, reason, data) end @@ -147,8 +149,8 @@ end local function check_output(machine, expected) assert(machine:read_reg("iflags_X") ~= 0) local cmd, reason, output = machine:receive_cmio_request() - assert(cmd == cartesi.CMIO_YIELD_COMMAND_AUTOMATIC) - assert(reason == cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT) + assert(cmd == cartesi.HTIF_YIELD_CMD_AUTOMATIC) + assert(reason == cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT) if expected ~= output then local e = assert(io.open("expected.bin", "wb")) local o = assert(io.open("output.bin", "wb")) @@ -163,16 +165,16 @@ end local function check_report(machine, expected) assert(machine:read_reg("iflags_X") ~= 0) local cmd, reason, output = machine:receive_cmio_request() - assert(cmd == cartesi.CMIO_YIELD_COMMAND_AUTOMATIC) - assert(reason == cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT) + assert(cmd == cartesi.HTIF_YIELD_CMD_AUTOMATIC) + assert(reason == cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT) assert(expected == output) end local function check_exception(machine, expected) assert(machine:read_reg("iflags_Y") ~= 0) local cmd, reason, output = machine:receive_cmio_request() - assert(cmd == cartesi.CMIO_YIELD_COMMAND_MANUAL) - assert(reason == cartesi.CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION) + assert(cmd == cartesi.HTIF_YIELD_CMD_MANUAL) + assert(reason == cartesi.HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION) assert(expected == output, string.format("expected: %q, got: %q", expected, output)) end @@ -208,11 +210,11 @@ end local function check_finish(machine, output_hashes, expected_reason) local cmd, reason, output = machine:receive_cmio_request() assert(machine:read_reg("iflags_Y") ~= 0) - assert(cmd == cartesi.CMIO_YIELD_COMMAND_MANUAL) + assert(cmd == cartesi.HTIF_YIELD_CMD_MANUAL) assert(reason == expected_reason) -- only check for output-hashes-root-hash if the input was accepted - if expected_reason == cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED then + if expected_reason == cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED then assert(#output == cartesi.HASH_SIZE) check_outputs_root_hash(output, output_hashes) else @@ -290,7 +292,7 @@ for _, dapp in pairs({ "ioctl", "http" }) do -- finish machine:run(MAX_MCYCLE) - check_finish(machine, hashes, cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED) + check_finish(machine, hashes, cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED) end return 0 @@ -314,7 +316,7 @@ for _, dapp in pairs({ "ioctl", "http" }) do -- finish machine:run(MAX_MCYCLE) - check_finish(machine, hashes, cartesi.CMIO_YIELD_MANUAL_REASON_RX_REJECTED) + check_finish(machine, hashes, cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED) return 0 end, 0) diff --git a/tests/lua/create-step-logs.lua b/tests/lua/create-step-logs.lua deleted file mode 100755 index 6e78eafa3..000000000 --- a/tests/lua/create-step-logs.lua +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env lua5.4 - --- Copyright Cartesi and individual authors (see AUTHORS) --- SPDX-License-Identifier: Apache-2.0 --- --- Licensed under the Apache License, Version 2.0 (the "License"); --- you may not use this file except in compliance with the License. --- You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. --- - --- This script creates a set of step log files to be used as test fixtures. --- Files are named step-.log. Values (hashes, mcycle count) are --- in the step log header and can be extracted with risc0/step-log-util.lua. --- Files are stored in CARTESI_STEP_LOGS_PATH. - -local cartesi = require("cartesi") -local test_util = require("cartesi.tests.util") - -local function stderr_unsilenceable(fmt, ...) - io.stderr:write(string.format(fmt, ...)) -end - -local stderr = stderr_unsilenceable - -local function create_directory(path) - local success = io.open(path, "r") - if success == nil then - os.execute("mkdir -p " .. path) - stderr("Created directory:" .. path .. "\n") - else - success:close() - stderr("Directory already exists:" .. path .. "\n") - end -end - -local function create_default_config(images_dir, command) - return { - hash_tree = { - hash_function = "sha256", - }, - - ram = { - length = 0x4000000, - backing_store = { - data_filename = images_dir .. "linux.bin", - }, - }, - dtb = { - entrypoint = command, - }, - cmio = { - rx_buffer = { shared = false }, - tx_buffer = { shared = false }, - }, - flash_drive = { - { - backing_store = { - data_filename = images_dir .. "rootfs.ext2", - }, - }, - }, - } -end - -local function adjust_images_path(path) - return string.gsub(path or ".", "/*$", "") .. "/" -end - -local IMAGES_DIR = adjust_images_path(test_util.images_path) -local STEP_LOGS_PATH = adjust_images_path(test_util.step_logs_path) - -local function create_machine(command) - local config = create_default_config(IMAGES_DIR, command) - local machine = cartesi.machine(config) - return machine -end - -local function create_step_log(mcycle_count, command, start_mcycle) - local temp_filename = STEP_LOGS_PATH .. "temp.log" - local deleter = {} - local function remove_temp_file() - os.remove(temp_filename) - end - setmetatable(deleter, { - __gc = remove_temp_file, - }) - - start_mcycle = start_mcycle or 0 - local machine = create_machine(command) - machine:run(start_mcycle) - assert(machine:read_reg("mcycle") == start_mcycle) - machine:log_step(mcycle_count, temp_filename) - local final_filename = STEP_LOGS_PATH .. string.format("step-%d.log", start_mcycle) - assert( - os.execute(string.format("cp '%s' '%s'", temp_filename, final_filename)), - "failed to copy step log to " .. final_filename - ) - print("Created step log:" .. final_filename) - remove_temp_file() -end - -local command = 'lua -e "print(os.clock(), (10.5 * 2.3 + 5.7 / 3.1 - math.sqrt(42)) ^ 1.5)" | sha256sum' -local machine = create_machine(command) -machine:run() -local max_mcycle = machine:read_reg("mcycle") -local files_count = 10 -local mcycle_stride = max_mcycle // files_count -local max_step_count = 1000 -local mcycle_count = mcycle_stride > max_step_count and max_step_count or mcycle_stride - -create_directory(STEP_LOGS_PATH) - -create_step_log(1, command, 0) -- 1-mcycle for risc0 pipeline tests -for start_mcycle = mcycle_stride, max_mcycle, mcycle_stride do - create_step_log(mcycle_count, command, start_mcycle) -end diff --git a/tests/lua/htif-yield.lua b/tests/lua/htif-yield.lua index 6c62d3610..432ebdfd1 100755 --- a/tests/lua/htif-yield.lua +++ b/tests/lua/htif-yield.lua @@ -131,15 +131,15 @@ if uarch then config.uarch = uarch end -local YIELD_MANUAL = cartesi.CMIO_YIELD_COMMAND_MANUAL -local YIELD_AUTOMATIC = cartesi.CMIO_YIELD_COMMAND_AUTOMATIC +local YIELD_MANUAL = cartesi.HTIF_YIELD_CMD_MANUAL +local YIELD_AUTOMATIC = cartesi.HTIF_YIELD_CMD_AUTOMATIC -local REASON_PROGRESS = cartesi.CMIO_YIELD_AUTOMATIC_REASON_PROGRESS -local REASON_TX_OUTPUT = cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_OUTPUT -local REASON_TX_REPORT = cartesi.CMIO_YIELD_AUTOMATIC_REASON_TX_REPORT -local REASON_RX_ACCEPTED = cartesi.CMIO_YIELD_MANUAL_REASON_RX_ACCEPTED -local REASON_RX_REJECTED = cartesi.CMIO_YIELD_MANUAL_REASON_RX_REJECTED -local REASON_TX_EXCEPTION = cartesi.CMIO_YIELD_MANUAL_REASON_TX_EXCEPTION +local REASON_PROGRESS = cartesi.HTIF_YIELD_AUTOMATIC_REASON_PROGRESS +local REASON_TX_OUTPUT = cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_OUTPUT +local REASON_TX_REPORT = cartesi.HTIF_YIELD_AUTOMATIC_REASON_TX_REPORT +local REASON_RX_ACCEPTED = cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED +local REASON_RX_REJECTED = cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED +local REASON_TX_EXCEPTION = cartesi.HTIF_YIELD_MANUAL_REASON_TX_EXCEPTION local yields = { { mcycle = 10, data = 10, cmd = YIELD_MANUAL, reason = REASON_PROGRESS }, diff --git a/tests/lua/log-with-mtime-transition.lua b/tests/lua/log-with-mtime-transition.lua index f8f2461d0..b4b18385a 100755 --- a/tests/lua/log-with-mtime-transition.lua +++ b/tests/lua/log-with-mtime-transition.lua @@ -17,9 +17,12 @@ local machine = cartesi.machine(config) io.stderr:write("getting root hash\n") local old_hash = machine:get_root_hash() io.stderr:write("getting uarch step log\n") -local access_log = machine:log_step_uarch() +local filename = os.tmpname() +os.remove(filename) +machine:log_step_uarch(1, filename) io.stderr:write("getting new root hash\n") local new_hash = machine:get_root_hash() io.stderr:write("verifying step log\n") -cartesi.machine:verify_step_uarch(old_hash, access_log, new_hash, {}) +cartesi.machine:verify_step_uarch(old_hash, filename, 1, new_hash) +os.remove(filename) print("ok") diff --git a/tests/lua/machine-bind.lua b/tests/lua/machine-bind.lua index 8fe227fa1..f515733c6 100755 --- a/tests/lua/machine-bind.lua +++ b/tests/lua/machine-bind.lua @@ -262,6 +262,28 @@ end local do_test = test_util.make_do_test(build_machine, machine_type) +-- Step-log filename helpers. os.tmpname() returns a name and creates the file; +-- log_* require the file to not exist, so we remove it first. + +local function tmpname_for_log() + local filename = os.tmpname() + os.remove(filename) + return filename +end + +local function step_uarch(machine) + local filename = tmpname_for_log() + machine:log_step_uarch(1, filename) + os.remove(filename) +end + +-- Binary step log helpers (definitions in cartesi.tests.util). +local read_step_log_file = test_util.read_step_log_file +local copy_step_log = test_util.copy_step_log + +-- Sentinel revert root hash send_cmio_response stores in its shadow slot; uarch reset reads it back. +local CMIO_REVERT_HASH = string.rep("\xab", cartesi.HASH_SIZE) + print("Testing machine bindings for type " .. machine_type) print("\n\nDifferent hash tree hash targets") @@ -648,7 +670,7 @@ print("\n\n perform step and check mcycle register") do_test("mcycle value should match", function(machine) local uarch_cycle_initial_value = machine:read_reg("uarch_cycle") - machine:log_step_uarch() + step_uarch(machine) -- Check mcycle increment local uarch_cycle_current_value = machine:read_reg("uarch_cycle") @@ -763,81 +785,100 @@ do_test("written and read values should match", function(machine) end end) -print("\n\n dump step log to console") -do_test("dumped step log content should match", function(machine) - local log = machine:log_step_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS | cartesi.ACCESS_LOG_TYPE_LARGE_DATA) - local temp_file = test_util.new_temp_file() - util.dump_log(log, temp_file) - local log_output = temp_file:read_all() - -- luacheck: push no max line length - local expected_output = "begin step\n" - .. " 1: read uarch.cycle@0x400008(4194312): 0x0(0)\n" - .. " 2: read uarch.halt_flag@0x400000(4194304): 0x0(0)\n" - .. " 3: read uarch.pc@0x400010(4194320): 0x600000(6291456)\n" - .. " 4: read uarch.ram@0x600000(6291456): 0x10089307b00513(4513027209561363)\n" - .. " begin addi\n" - .. " 5: read uarch.x0@0x400018(4194328): 0x0(0)\n" - .. " 6: write uarch.x10@0x400068(4194408): 0x10050(65616) -> 0x7b(123)\n" - .. " 7: write uarch.pc@0x400010(4194320): 0x600000(6291456) -> 0x600004(6291460)\n" - .. " end addi\n" - .. " 8: write uarch.cycle@0x400008(4194312): 0x0(0) -> 0x1(1)\n" - .. "end step\n" - -- luacheck: pop - print("Log output:") - print("--------------------------") - print(log_output) - print("--------------------------") - assert(log_output == expected_output, "Output does not match expected output:\n" .. expected_output) -end) - print("\n\ntesting step and verification") do_test("machine step should pass verifications", function(machine) local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) + local filename = tmpname_for_log() + machine:log_step_uarch(1, filename) local final_hash = machine:get_root_hash() - machine:verify_step_uarch(initial_hash, log, final_hash) + machine:verify_step_uarch(initial_hash, filename, 1, final_hash) + os.remove(filename) end) -print("\n\ntesting step and verification") -do_test("Step log must contain consistent data hashes", function(machine) - local wrong_hash = string.rep("\0", cartesi.HASH_SIZE) +do_test("pretty_print_step_uarch writes a readable printout", function(machine) + local log = tmpname_for_log() + -- Two micro cycles to exercise the multi-cycle replay (default program: "li a0,123", "li a7,halt"). + machine:log_step_uarch(2, log) + local text = cartesi.machine:pretty_print_step_uarch(log) + os.remove(log) + -- Match the whole printout line by line; addresses and values are wildcarded so the expectation + -- survives shadow-layout/cycle drift while order, numbering, names, and brackets stay pinned. + local expected = { + -- Lines 1-3 are the uarch.cycle counter read at three layers: the pretty-print driver, the + -- interpret loop, and uarch_step. Only uarch_step's read is intrinsic to stepping. + "^1: read uarch%.cycle@0x%x+: 0x%x+$", + "^2: read uarch%.cycle@0x%x+: 0x%x+$", + "^3: read uarch%.cycle@0x%x+: 0x%x+$", + "^4: read uarch%.halt_flag@0x%x+: 0x%x+$", + "^5: read uarch%.pc@0x%x+: 0x%x+$", + "^6: read @0x%x+: 0x%x+$", + "^begin addi$", + "^ 7: read uarch%.x0@0x%x+: 0x%x+$", + "^ 8: write uarch%.x10@0x%x+: 0x%x+ %-> 0x%x+$", + "^ 9: write uarch%.pc@0x%x+: 0x%x+ %-> 0x%x+$", + "^end addi$", + "^10: write uarch%.cycle@0x%x+: 0x%x+ %-> 0x%x+$", + "^11: read uarch%.cycle@0x%x+: 0x%x+$", + "^12: read uarch%.halt_flag@0x%x+: 0x%x+$", + "^13: read uarch%.pc@0x%x+: 0x%x+$", + "^14: read @0x%x+: 0x%x+$", + "^begin addi$", + "^ 15: read uarch%.x0@0x%x+: 0x%x+$", + "^ 16: write uarch%.x17@0x%x+: 0x%x+ %-> 0x%x+$", + "^ 17: write uarch%.pc@0x%x+: 0x%x+ %-> 0x%x+$", + "^end addi$", + "^18: write uarch%.cycle@0x%x+: 0x%x+ %-> 0x%x+$", + } + local lines = {} + for line in (text .. "\n"):gmatch("(.-)\n") do + lines[#lines + 1] = line + end + if lines[#lines] == "" then -- drop the trailing empty split element + lines[#lines] = nil + end + assert(#lines == #expected, string.format("printout has %d lines, expected %d:\n%s", #lines, #expected, text)) + for i, pat in ipairs(expected) do + assert(lines[i]:match(pat), string.format("printout line %d %q does not match %q", i, lines[i], pat)) + end +end) + +-- Generic step-log format-corruption rejection is tested against the replay parser in +-- spec-verify-step-failure.lua. The cases below cover only what is per-function: +-- the Layer 2 argument checks and the function-specific replay checks +-- (UARCH_STATE pristine for reset, supra-page padded hash for cmio). + +print("\n\ntesting verify_step_uarch unhappy paths") +do_test("verify_step_uarch rejects mismatched Layer 2 arguments", function(machine) + local bad_hash = string.rep("\0", cartesi.HASH_SIZE) local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch() + local filename = tmpname_for_log() + machine:log_step_uarch(1, filename) local final_hash = machine:get_root_hash() - machine:verify_step_uarch(initial_hash, log, final_hash) - local read_access = log.accesses[1] - assert(read_access.type == "read") - local read_hash = read_access.read_hash - -- ensure that verification fails with wrong read hash - read_access.read_hash = wrong_hash - local _, err = pcall(machine.verify_step_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, "siblings and read hash do not match root hash before 1st access to uarch.cycle") - read_access.read_hash = read_hash -- restore correct value - - -- ensure that verification fails with wrong read hash - local write_access = log.accesses[#log.accesses] - assert(write_access.type == "write") - read_hash = write_access.read_hash - write_access.read_hash = wrong_hash - _, err = pcall(machine.verify_step_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, "siblings and read hash do not match root hash before 8th access to uarch.cycle") - write_access.read_hash = read_hash -- restore correct value - - -- ensure that verification fails with wrong written hash - write_access.written_hash = wrong_hash - _, err = pcall(machine.verify_step_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, "written hash for uarch.cycle does not match expected hash in 8th access") + -- sanity: happy path + machine:verify_step_uarch(initial_hash, filename, 1, final_hash) + -- bad root_hash_before arg + local _, err = pcall(machine.verify_step_uarch, machine, bad_hash, filename, 1, final_hash) + check_error_find(err, "root hash before mismatch") + -- bad uarch_cycle_count arg + _, err = pcall(machine.verify_step_uarch, machine, initial_hash, filename, 99, final_hash) + check_error_find(err, "uarch cycle count mismatch") + -- bad root_hash_after arg + _, err = pcall(machine.verify_step_uarch, machine, initial_hash, filename, 1, bad_hash) + check_error_find(err, "root hash after mismatch") + os.remove(filename) end) do_test("step when uarch cycle is max", function(machine) machine:write_reg("uarch_cycle", MAX_UARCH_CYCLE) assert(machine:read_reg("uarch_cycle") == MAX_UARCH_CYCLE) local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) + local filename = tmpname_for_log() + machine:log_step_uarch(1, filename) assert(machine:read_reg("uarch_cycle") == MAX_UARCH_CYCLE) local final_hash = machine:get_root_hash() assert(final_hash == initial_hash) - machine:verify_step_uarch(initial_hash, log, final_hash) + machine:verify_step_uarch(initial_hash, filename, 1, final_hash) + os.remove(filename) end) local uarch_proof_step_program = { @@ -860,26 +901,26 @@ test_util.make_do_test(build_machine, machine_type, { local t2 = 7 local uarch_ram_start = cartesi.UARCH_RAM_START_ADDRESS - machine:log_step_uarch() -- auipc t0,0x0 - machine:log_step_uarch() -- addi t0,t0,256 # 0x100 + step_uarch(machine) -- auipc t0,0x0 + step_uarch(machine) -- addi t0,t0,256 # 0x100 assert(machine:read_reg("uarch_x" .. t0) == uarch_ram_start + 0x100) - machine:log_step_uarch() -- li t1,0xca + step_uarch(machine) -- li t1,0xca assert(machine:read_reg("uarch_x" .. t1) == 0xca) - machine:log_step_uarch() -- li t2,0xfe + step_uarch(machine) -- li t2,0xfe assert(machine:read_reg("uarch_x" .. t2) == 0xfe) -- sd and assert stored correctly - machine:log_step_uarch() -- sd t1,0(t0) [0xca] + step_uarch(machine) -- sd t1,0(t0) [0xca] assert(string.unpack("I8", machine:read_memory(uarch_ram_start + 0x100, 8)) == 0xca) -- sd and assert stored correctly - machine:log_step_uarch() -- t2,0(t0) [0xfe] + step_uarch(machine) -- t2,0(t0) [0xfe] assert(string.unpack("I8", machine:read_memory(uarch_ram_start + 0x100, 8)) == 0xfe) -- This step should run successfully -- The previous unproven step should have marked the updated pages dirty, allowing -- the tree to be updated correctly in the next proved step - machine:log_step_uarch() -- sd t1,0(t0) [0xca] + step_uarch(machine) -- sd t1,0(t0) [0xca] assert(string.unpack("I8", machine:read_memory(uarch_ram_start + 0x100, 8)) == 0xca) end) @@ -924,7 +965,7 @@ test_util.make_do_test(build_machine, machine_type, { uarch = {} })( local hash_after_immediate_reset = machine:get_root_hash() assert(initial_hash == hash_after_immediate_reset) -- hash should change after one step (shadow uarch change) - machine:log_step_uarch() + step_uarch(machine) local hash_after_step = machine:get_root_hash() assert(hash_after_step ~= initial_hash) -- reset should restore initial hash @@ -955,7 +996,7 @@ for i = 0, 31 do test_reset_uarch_config.processor.registers["x" .. i] = 0x10000 + (i * 8) end -local function test_reset_uarch(machine, with_log, with_annotations) +local function test_reset_uarch(machine, with_log) -- assert initial fixture state assert(machine:read_reg("uarch_halt_flag") ~= 0) assert(machine:read_reg("uarch_cycle") == 1) @@ -972,17 +1013,10 @@ local function test_reset_uarch(machine, with_log, with_annotations) assert(uarch_state_hash ~= cartesi.UARCH_PRISTINE_STATE_HASH) -- reset uarch state if with_log then - local log_type = (with_annotations and cartesi.ACCESS_LOG_TYPE_ANNOTATIONS or 0) - local log = machine:log_reset_uarch(log_type) - assert(#log.accesses == 1) - local access = log.accesses[1] - assert(access.sibling_hashes ~= nil) - assert(access.address == cartesi.UARCH_SHADOW_START_ADDRESS) - assert(access.log2_size == cartesi.UARCH_STATE_LOG2_SIZE) - assert(access.written_hash == cartesi.UARCH_PRISTINE_STATE_HASH) - assert(access.written == nil) - assert(access.read_hash ~= nil) - assert(access.read == nil) + -- Exercise the logging path (log structure/round-trip covered separately). + local filename = tmpname_for_log() + machine:log_reset_uarch(filename) + os.remove(filename) else machine:reset_uarch() end @@ -1004,44 +1038,149 @@ end test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( "Testing reset_uarch without logging", function(machine) - test_reset_uarch(machine, false, false) + test_reset_uarch(machine, false) end ) -for _, with_annotations in ipairs({ true, false }) do - test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( - "Testing reset_uarch with logging, annotations=" .. tostring(with_annotations), - function(machine) - test_reset_uarch(machine, true, with_annotations) +test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( + "Testing reset_uarch with logging", + function(machine) + test_reset_uarch(machine, true) + end +) + +test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( + "log_reset_uarch records the UARCH_STATE node with the pristine post-hash", + function(machine) + local filename = tmpname_for_log() + machine:log_reset_uarch(filename) + local log_data = read_step_log_file(filename) + assert(#log_data.nodes == 1, "expected exactly one node in a reset_uarch log") + local n = log_data.nodes[1] + assert( + n.address == cartesi.UARCH_STATE_START_ADDRESS, + string.format( + "node address 0x%x != UARCH_STATE_START_ADDRESS 0x%x", + n.address, + cartesi.UARCH_STATE_START_ADDRESS + ) + ) + assert( + n.log2_size == cartesi.UARCH_STATE_LOG2_SIZE, + string.format("node log2_size %d != UARCH_STATE_LOG2_SIZE %d", n.log2_size, cartesi.UARCH_STATE_LOG2_SIZE) + ) + assert( + n.hash_after == cartesi.UARCH_PRISTINE_STATE_HASH, + "node hash_after does not match cartesi.UARCH_PRISTINE_STATE_HASH" + ) + os.remove(filename) + end +) + +test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( + "log_reset_uarch witnesses the revert root hash shadow page into the step log", + function(machine) + -- The reset accesses the revert root hash (seeded here) and htif.tohost, forcing their shadow + -- page into the log so a consumer can read both straight off the reset proof. + machine:write_memory(cartesi.AR_SHADOW_REVERT_ROOT_HASH_START, CMIO_REVERT_HASH) + local initial_hash = machine:get_root_hash() + local filename = tmpname_for_log() + machine:log_reset_uarch(filename) + local final_hash = machine:get_root_hash() + local log_data = read_step_log_file(filename) + local page_idx = cartesi.AR_SHADOW_REVERT_ROOT_HASH_START >> cartesi.HASH_TREE_LOG2_PAGE_SIZE + local offset = cartesi.AR_SHADOW_REVERT_ROOT_HASH_START & ((1 << cartesi.HASH_TREE_LOG2_PAGE_SIZE) - 1) + local found + for _, p in ipairs(log_data.pages) do + if p.index == page_idx then + found = p + end end - ) -end + assert(found, "reset log must record the shadow page holding the revert root hash") + assert( + found.data:sub(offset + 1, offset + cartesi.HASH_SIZE) == CMIO_REVERT_HASH, + "revert root hash not found at its shadow slot in the reset log" + ) + machine:verify_reset_uarch(initial_hash, filename, final_hash) + os.remove(filename) + end +) test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( "Testing verify_reset_uarch", function(machine) local initial_hash = machine:get_root_hash() - local log = machine:log_reset_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) + local filename = tmpname_for_log() + machine:log_reset_uarch(filename) local final_hash = machine:get_root_hash() -- verify happy path - machine:verify_reset_uarch(initial_hash, log, final_hash) + machine:verify_reset_uarch(initial_hash, filename, final_hash) -- verifying incorrect initial hash - local wrong_hash = string.rep("0", cartesi.HASH_SIZE) - local _, err = pcall(machine.verify_reset_uarch, machine, wrong_hash, log, final_hash) - check_error_find(err, "siblings and read hash do not match root hash before 1st access to uarch.state") + local wrong_hash = string.rep("\0", cartesi.HASH_SIZE) + local _, err = pcall(machine.verify_reset_uarch, machine, wrong_hash, filename, final_hash) + check_error_find(err, "root hash before mismatch") -- verifying incorrect final hash - _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, log, wrong_hash) - check_error_find(err, "mismatch in root hash after replay") + _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, filename, wrong_hash) + check_error_find(err, "root hash after mismatch") + os.remove(filename) end ) test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( - "Testing verify_reset_uarch", + "verify_reset_uarch rejects an unconsumed subtree-write node", function(machine) local initial_hash = machine:get_root_hash() - local log = machine:log_reset_uarch() + local filename = tmpname_for_log() + machine:log_reset_uarch(filename) local final_hash = machine:get_root_hash() - machine:verify_reset_uarch(initial_hash, log, final_hash) + -- A reset log carries exactly one node (the uarch state). A second node that no + -- write consumes must be rejected: its hash_after is folded into the post-state + -- root verbatim. + local corrupted = tmpname_for_log() + copy_step_log(filename, corrupted, test_util.inject_unconsumed_node) + local _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, corrupted, final_hash) + check_error_find(err, "unconsumed node in step log") + os.remove(filename) + os.remove(corrupted) + end +) + +test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( + "verify_reset_uarch rejects an unconsumed node on a reverted reset", + function(machine) + -- Pause the machine on an rx-rejected manual yield so the reset reverts: its post-state is the + -- recorded revert root hash, not the recomputed tree root. The revert path substitutes that root + -- instead of recomputing it, so the unconsumed-node check must hold there too. + local revert_root_hash = string.rep("\171", 32) + local tohost_rx_rejected = (2 << 56) | (1 << 48) | (cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED << 32) + machine:write_reg("uarch_halt_flag", 1) + machine:write_reg("iflags_Y", 1) + machine:write_reg("htif_tohost", tohost_rx_rejected) + machine:write_memory(cartesi.AR_SHADOW_REVERT_ROOT_HASH_START, revert_root_hash) + local initial_hash = machine:get_root_hash() + local filename = tmpname_for_log() + machine:log_reset_uarch(filename) + local corrupted = tmpname_for_log() + copy_step_log(filename, corrupted, test_util.inject_unconsumed_node) + local _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, corrupted, revert_root_hash) + check_error_find(err, "unconsumed node in step log") + os.remove(filename) + os.remove(corrupted) + end +) + +test_util.make_do_test(build_machine, machine_type)( + "collect root hashes reject an out-of-range bundle count", + function(machine) + -- The bundle count narrows to int at the binding boundary; an oversized value must be + -- rejected, not silently wrapped to a small valid-looking size. + local huge = 1 << 32 + local ok, err = pcall(machine.collect_mcycle_root_hashes, machine, 0, 1, 0, huge) + assert(not ok) + check_error_find(err, "out of range") + ok, err = pcall(machine.collect_uarch_cycle_root_hashes, machine, 0, huge) + assert(not ok) + check_error_find(err, "out of range") end ) @@ -1064,181 +1203,59 @@ test_util.make_do_test(build_machine, machine_type, { hash_tree = { hash_functio success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") ) -- log_reset_uarch should fail - success, err = pcall(machine.log_reset_uarch, machine) + success, err = pcall(machine.log_reset_uarch, machine, tmpname_for_log()) assert( success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") ) -- log_uarch step should fail - success, err = pcall(machine.log_step_uarch, machine) + success, err = pcall(machine.log_step_uarch, machine, 1, tmpname_for_log()) assert( success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") ) -- log_send_cmio_response should fail - success, err = pcall(machine.log_send_cmio_response, machine, 0, 0) + success, err = pcall(machine.log_send_cmio_response, machine, CMIO_REVERT_HASH, 0, "", tmpname_for_log()) assert( success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") ) end ) +print("\n\ntesting verify_reset_uarch unhappy paths") test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( - "Dump of log produced by log_reset_uarch should match", + "verify_reset_uarch rejects mismatched Layer 2 arguments and a non-pristine logged node", function(machine) - local log = machine:log_reset_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) - local expected_dump_pattern = "begin reset_uarch_state\n" - .. " 1: write uarch.state@0x400000%(4194304%): " - .. 'hash:"[0-9a-f]+"%(2%^22 bytes%) %-> hash:"[0-9a-fA-F]+"%(2%^22 bytes%)\n' - .. "end reset_uarch_state\n" - - local tmpname = os.tmpname() - local deleter = {} - setmetatable(deleter, { - __gc = function() - os.remove(tmpname) - end, - }) - local tmp = assert(io.open(tmpname, "w+")) - util.dump_log(log, tmp) - tmp:seek("set", 0) - local actual_dump = tmp:read("*all") - - print("Output of reset_uarch log dump:") - print("--------------------------") - print(actual_dump) - print("--------------------------") - assert( - actual_dump:match(expected_dump_pattern), - "Dump of uarch_reset_state does not match expected pattern:\n" .. expected_dump_pattern - ) - end -) - -test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( - "Log uarch reset with large_data option set must have consistent read and written data", - function(machine) - -- reset uarch and get log + local bad_hash = string.rep("\0", cartesi.HASH_SIZE) local initial_hash = machine:get_root_hash() - local log = machine:log_reset_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS | cartesi.ACCESS_LOG_TYPE_LARGE_DATA) + local filename1 = tmpname_for_log() + local filename2 = tmpname_for_log() + machine:log_reset_uarch(filename1) local final_hash = machine:get_root_hash() - assert(#log.accesses == 1, "log should have 1 access") - local access = log.accesses[1] - -- when large data is requested, the log must include read and written data - assert(access.read ~= nil, "read data should not be nil") - assert(access.written ~= nil, "written data should not be nil") - -- verify returned log - machine:verify_reset_uarch(initial_hash, log, final_hash) - -- save logged read and written data - local original_read = access.read - -- tamper with read data to produce a hash mismatch - access.read = "X" .. access.read:sub(2) - local _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, "read data for uarch.state does not match read hash in 1st access") - -- restore correct read - access.read = original_read - -- change written data to produce a hash mismatch - access.written = "X" .. access.written:sub(2) - _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, "written data for uarch.state does not match written hash in 1st access") + -- sanity: happy path + machine:verify_reset_uarch(initial_hash, filename1, final_hash) + -- bad root_hash_before arg + local _, err = pcall(machine.verify_reset_uarch, machine, bad_hash, filename1, final_hash) + check_error_find(err, "root hash before mismatch") + -- bad root_hash_after arg + _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, filename1, bad_hash) + check_error_find(err, "root hash after mismatch") + -- non-pristine hash_after must trip the reset_uarch pristine-state check + copy_step_log(filename1, filename2, function(log_data) + assert(#log_data.nodes == 1, "reset_uarch log should have exactly one node") + log_data.nodes[1].hash_after = bad_hash + end) + _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, filename2, final_hash) + check_error_find(err, "reset uarch node has wrong post-hash") + -- canonical form: reset_uarch logs must record requested_cycle_count = 0 + copy_step_log(filename1, filename2, function(log_data) + log_data.requested_cycle_count = 1 + end) + _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, filename2, final_hash) + check_error_find(err, "requested_cycle_count must be zero in reset_uarch log") + os.remove(filename1) + os.remove(filename2) end ) -do_test("Test unhappy paths of verify_reset_uarch", function(machine) - local bad_hash = string.rep("\0", cartesi.HASH_SIZE) - local function assert_error(expected_error, callback) - machine:reset_uarch() - local initial_hash = machine:get_root_hash() - local log = machine:log_reset_uarch() - local final_hash = machine:get_root_hash() - callback(log) - local _, err = pcall(machine.verify_reset_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, expected_error) - end - assert_error("log is missing access 1st access to uarch.state", function(log) - log.accesses = {} - end) - assert_error("expected 1st access to write uarch.state at address 0x400000(4194304)", function(log) - log.accesses[1].address = 0 - end) - - assert_error("expected 1st access to uarch.state to write 2^22 bytes", function(log) - log.accesses[1].log2_size = 64 - end) - - assert_error('missing field "log/accesses/0/read_hash"', function(log) - log.accesses[#log.accesses].read_hash = nil - end) - assert_error("siblings and read hash do not match root hash before 1st access to uarch.state", function(log) - log.accesses[1].read_hash = bad_hash - end) - assert_error("access log was not fully consumed", function(log) - log.accesses[#log.accesses + 1] = log.accesses[1] - end) - assert_error("missing written hash of uarch.state in 1st access", function(log) - log.accesses[#log.accesses].written_hash = nil - end) - assert_error("access written data size is inconsistent with proof size", function(log) - log.accesses[#log.accesses].written = "\0" - end) - assert_error("written data for uarch.state does not match written hash in 1st access", function(log) - log.accesses[#log.accesses].written = string.rep("\0", 2 ^ 22) - end) - assert_error("siblings and read hash do not match root hash before 1st access to uarch.state", function(log) - log.accesses[1].sibling_hashes[1] = bad_hash - end) -end) - -do_test("Test unhappy paths of verify_step_uarch", function(machine) - local bad_hash = string.rep("\0", cartesi.HASH_SIZE) - local function assert_error(expected_error, callback) - machine:reset_uarch() - local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch() - local final_hash = machine:get_root_hash() - callback(log) - local _, err = pcall(machine.verify_step_uarch, machine, initial_hash, log, final_hash) - check_error_find(err, expected_error) - end - assert_error("log is missing access 1st access to uarch.cycle", function(log) - log.accesses = {} - end) - assert_error("expected 1st access to read uarch.cycle", function(log) - log.accesses[1].address = 0 - end) - assert_error("expected 1st access to uarch.cycle to read 2^3 bytes", function(log) - log.accesses[1].log2_size = 2 - end) - assert_error("expected 1st access to uarch.cycle to read 2^3 bytes", function(log) - log.accesses[1].log2_size = 65 - end) - assert_error("missing read data for uarch.cycle in 1st access", function(log) - log.accesses[1].read = nil - end) - assert_error("access read data size is inconsistent with proof size", function(log) - log.accesses[1].read = "\0" - end) - assert_error("siblings and read hash do not match root hash before 1st access to uarch.cycle", function(log) - log.accesses[1].read_hash = bad_hash - end) - assert_error("missing field", function(log) - log.accesses[#log.accesses].read_hash = nil - end) - assert_error("access log was not fully consumed", function(log) - log.accesses[#log.accesses + 1] = log.accesses[1] - end) - assert_error("missing written hash of uarch.cycle in 7th access", function(log) - log.accesses[#log.accesses].written_hash = nil - end) - assert_error("access written data size is inconsistent with proof size", function(log) - log.accesses[#log.accesses].written = "\0" - end) - assert_error("written data for uarch.cycle does not match written hash in 7th access", function(log) - log.accesses[#log.accesses].written = string.rep("\0", cartesi.HASH_SIZE) - end) - assert_error("siblings and read hash do not match root hash before 1st access to uarch.cycle", function(log) - log.accesses[1].sibling_hashes[1] = bad_hash - end) -end) - print("\n\n testing unsupported uarch instructions ") local uarch_illegal_insn_program = { @@ -1294,43 +1311,60 @@ do_test("send_cmio_response fails if iflags.Y is not set", function(machine) local data = string.rep("a", 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE) machine:write_reg("iflags_Y", 0) assert(machine:read_reg("iflags_Y") == 0) + -- the host send refuses upfront test_util.assert_error("iflags.Y is not set", function() - machine:send_cmio_response(reason, data) - end) - test_util.assert_error("iflags.Y is not set", function() - machine:log_send_cmio_response(reason, data) + machine:send_cmio_response(CMIO_REVERT_HASH, reason, data) end) + -- the logged operation cannot fail; it is a no-op that leaves the state unchanged + local hash_before = machine:get_root_hash() + machine:log_send_cmio_response(CMIO_REVERT_HASH, reason, data, tmpname_for_log()) + assert(machine:read_reg("iflags_Y") == 0) + assert(machine:get_root_hash() == hash_before) end) do_test("send_cmio_response fails if data is too big", function(machine) local reason = 1 local data_too_big = string.rep("a", 1 + (1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE)) machine:write_reg("iflags_Y", 1) + -- the host send refuses upfront test_util.assert_error("CMIO response data is too large", function() - machine:send_cmio_response(reason, data_too_big) - end) - test_util.assert_error("CMIO response data is too large", function() - machine:log_send_cmio_response(reason, data_too_big) + machine:send_cmio_response(CMIO_REVERT_HASH, reason, data_too_big) end) + -- the logged operation cannot fail; it is a no-op that leaves the state unchanged + local hash_before = machine:get_root_hash() + machine:log_send_cmio_response(CMIO_REVERT_HASH, reason, data_too_big, tmpname_for_log()) + assert(machine:read_reg("iflags_Y") == 1) + assert(machine:get_root_hash() == hash_before) end) --- asserts that an access has the expected key values -local function assert_access(accesses, index, expected_key_and_values) - assert(index <= #accesses) - for k, v in pairs(expected_key_and_values) do - local a = accesses[index] - assert(a[k] == v, "access." .. tostring(index) .. " should be " .. tostring(v) .. " but is " .. tostring(a[k])) - end -end +do_test("advance-state response to a rejected machine logs as a no-op", function(machine) + local advance_reason = cartesi.HTIF_YIELD_REASON_ADVANCE_STATE + local data = "0123456789" + -- the machine yielded manual but rejected the previous input + machine:write_reg("iflags_Y", 1) + machine:write_reg("htif_tohost_dev", cartesi.HTIF_DEV_YIELD) + machine:write_reg("htif_tohost_cmd", cartesi.HTIF_YIELD_CMD_MANUAL) + machine:write_reg("htif_tohost_reason", cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED) + -- the host send refuses upfront + local _, err = pcall(machine.send_cmio_response, machine, machine:get_root_hash(), advance_reason, data) + check_error_find(err, "machine is not waiting on an rx-accepted manual yield") + -- the logged operation cannot fail; it is a no-op that leaves the state unchanged. The machine + -- stays paused on a rejected yield, which must NOT trigger a revert substitution (send_cmio_response + -- is not a step), so the post-operation hash is the unchanged machine root hash. + local hash_before = machine:get_root_hash() + local filename = tmpname_for_log() + machine:log_send_cmio_response(hash_before, advance_reason, data, filename) + assert(machine:read_reg("iflags_Y") == 1) + local hash_after = machine:get_root_hash() + assert(hash_after == hash_before) + machine:verify_send_cmio_response(hash_before, advance_reason, data, hash_before, filename, hash_after) +end) -local function test_send_cmio_input_with_different_arguments() +local function test_send_cmio_response_happy_path() local data = string.rep("a", 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE) local reason = 1 local max_rx_buffer_len = 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE - local hash_fn = "keccak256" - local data_hash = test_util.merkle_hash(data, 0, cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE, hash_fn) local all_zeros = string.rep("\0", max_rx_buffer_len) - local all_zeros_hash = test_util.merkle_hash(all_zeros, 0, cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE, hash_fn) -- prepares and asserts the state before send_cmio_response is called local function assert_before_cmio_response_sent(machine) machine:write_reg("iflags_Y", 1) @@ -1349,140 +1383,274 @@ local function test_send_cmio_input_with_different_arguments() end do_test("send_cmio_response happy path", function(machine) assert_before_cmio_response_sent(machine) - machine:send_cmio_response(reason, data) + machine:send_cmio_response(CMIO_REVERT_HASH, reason, data) assert_after_cmio_response_sent(machine) end) - for _, large_data in ipairs({ false, true }) do - local annotations = true - do_test( - string.format( - "log_send_cmio_response happy path with annotations=%s, large_data=%s", - annotations, - large_data - ), - function(machine) - local log_type = (annotations and cartesi.ACCESS_LOG_TYPE_ANNOTATIONS or 0) - | (large_data and cartesi.ACCESS_LOG_TYPE_LARGE_DATA or 0) - assert_before_cmio_response_sent(machine) - local root_hash_before = machine:get_root_hash() - local log = machine:log_send_cmio_response(reason, data, log_type) - assert_after_cmio_response_sent(machine) - local root_hash_after = machine:get_root_hash() - -- check log - local accesses = log.accesses - assert(#accesses == 4) - assert_access(accesses, 1, { - type = "read", - address = machine:get_reg_address("iflags_Y"), - log2_size = 3, - }) - assert_access(accesses, 2, { - type = "write", - address = cartesi.AR_CMIO_RX_BUFFER_START, - log2_size = cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE, - read_hash = all_zeros_hash, - read = large_data and all_zeros or nil, - written_hash = data_hash, - written = large_data and data or nil, - }) - assert_access(accesses, 3, { - type = "write", - address = machine:get_reg_address("htif_fromhost"), - log2_size = 3, - }) - assert_access(accesses, 4, { - type = "write", - address = machine:get_reg_address("iflags_Y"), - log2_size = 3, - }) - -- ask machine to verify state transitions - machine:verify_send_cmio_response(reason, data, root_hash_before, log, root_hash_after, log_type, {}) - end - ) - end end -test_send_cmio_input_with_different_arguments() - -do_test("Dump of log produced by send_cmio_response should match", function(machine) - machine:write_reg("iflags_Y", 1) - local data = "0123456789" - local reason = 7 - local log = machine:log_send_cmio_response(reason, data, cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) - local expected_dump = [[ -begin send_cmio_response - 1: read iflags.Y@0x300(768): 0x1(1) - 2: write cmio rx buffer@0x60000000(1610612736): hash:"290decd9"(2^5 bytes) -> hash:"555b1f6d"(2^5 bytes) - 3: write htif.fromhost@0x330(816): 0x0(0) -> 0x70000000a(30064771082) - 4: write iflags.Y@0x300(768): 0x1(1) -> 0x0(0) -end send_cmio_response -]] - local temp_file = test_util.new_temp_file() - util.dump_log(log, temp_file) - local actual_dump = temp_file:read_all() - print("Output of log_send_cmio_response dump:") - print("--------------------------") - print(actual_dump) - print("--------------------------") - assert(actual_dump == expected_dump, "Dump of uarch_reset_state does not match expected:\n" .. expected_dump) -end) +test_send_cmio_response_happy_path() -do_test("send_cmio_response with different data sizes", function(machine) - local test_cases = { - { data_len = 1, write_len = 32 }, - { data_len = 32, write_len = 32 }, - { data_len = 33, write_len = 64 }, - { data_len = 64, write_len = 64 }, - { data_len = 1 << 20, write_len = 1 << 20 }, - { data_len = (1 << 20) + 1, write_len = 1 << 21 }, - { data_len = 1 << 21, write_len = 1 << 21 }, - } +-- log_send_cmio_response writes (data || zero pad) to the rx buffer and logs it as: +-- no entry (data_len 0), a page entry (write_len <= page size), or a node entry at +-- AR_CMIO_RX_BUFFER_START with hash_after = merkle_tree_hash(data || zero pad). +do_test("send_cmio_response across data-size boundaries: machine state + log content", function(machine) + local PAGE_LOG2 = cartesi.HASH_TREE_LOG2_PAGE_SIZE + local PAGE_SIZE = 1 << PAGE_LOG2 + local WORD_SIZE = 1 << cartesi.HASH_TREE_LOG2_WORD_SIZE local rx_buffer_size = 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE - local initial_rx_buffer = string.rep("x", rx_buffer_size) + local rx_start = cartesi.AR_CMIO_RX_BUFFER_START + local rx_page_idx = rx_start >> PAGE_LOG2 local reason = 1 - local function padded_data(data, len, padding) - return data .. string.rep(padding, len - #data) - end - for _, case in ipairs(test_cases) do - -- test logging and lo not logging - for _, logging in ipairs({ false, true }) do - print( - string.format( - " testing sending cmio response of %s bytes causing a write of %s bytes with logging=%s ", - case.data_len, - case.write_len, - logging - ) + local hash_fn = "keccak256" + local initial_rx_buffer = string.rep("x", rx_buffer_size) + -- Each row: data_len + the expected log2 of the rx-buffer write (nil = no write). + local cases = { + { data_len = 0, write_log2 = nil }, + { data_len = 1, write_log2 = 5 }, + { data_len = WORD_SIZE, write_log2 = 5 }, + { data_len = WORD_SIZE + 1, write_log2 = 6 }, + { data_len = PAGE_SIZE - 1, write_log2 = 12 }, + { data_len = PAGE_SIZE, write_log2 = 12 }, + { data_len = PAGE_SIZE + 1, write_log2 = 13 }, + { data_len = (1 << 20), write_log2 = 20 }, + { data_len = (1 << 20) + 1, write_log2 = 21 }, + { data_len = rx_buffer_size, write_log2 = cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE }, + } + for _, c in ipairs(cases) do + print(string.format(" data_len=%d write_log2=%s", c.data_len, tostring(c.write_log2))) + machine:write_memory(rx_start, initial_rx_buffer) + local data = string.rep("a", c.data_len) + local filename = tmpname_for_log() + machine:write_reg("iflags_Y", 1) + machine:log_send_cmio_response(CMIO_REVERT_HASH, reason, data, filename) + + -- Machine state: rx buffer = data || zero pad up to write_len, then the original "x"s. + local write_len = c.write_log2 and (1 << c.write_log2) or 0 + local expected_rx = data + .. string.rep("\0", write_len - c.data_len) + .. string.rep("x", rx_buffer_size - write_len) + assert( + machine:read_memory(rx_start, rx_buffer_size) == expected_rx, + string.format("rx buffer mismatch for data_len=%d", c.data_len) + ) + + -- Log content: zero-length writes touch no rx page/node; sub-page writes + -- emit a page entry at the rx page index; supra-page writes emit a node + -- entry at rx_start whose hash_after is computable from (data || zeros). + local log_data = read_step_log_file(filename) + if c.write_log2 == nil then + for _, p in ipairs(log_data.pages) do + assert(p.index ~= rx_page_idx, string.format("unexpected rx page touch for data_len=%d", c.data_len)) + end + for _, n in ipairs(log_data.nodes) do + assert(n.address ~= rx_start, string.format("unexpected rx node for data_len=%d", c.data_len)) + end + elseif c.write_log2 > PAGE_LOG2 then + local found + for _, n in ipairs(log_data.nodes) do + if n.address == rx_start then + found = n + end + end + assert(found, string.format("missing rx node for data_len=%d", c.data_len)) + assert( + found.log2_size == c.write_log2, + string.format("node log2_size=%d != %d for data_len=%d", found.log2_size, c.write_log2, c.data_len) + ) + local padded = data .. string.rep("\0", write_len - c.data_len) + local expected_hash = test_util.merkle_hash(padded, 0, c.write_log2, hash_fn) + assert( + found.hash_after == expected_hash, + string.format("node hash_after mismatch for data_len=%d", c.data_len) ) - machine:write_memory(cartesi.AR_CMIO_RX_BUFFER_START, initial_rx_buffer) - assert(machine:read_memory(cartesi.AR_CMIO_RX_BUFFER_START, rx_buffer_size) == initial_rx_buffer) - local data = string.rep("a", case.data_len) - machine:write_reg("iflags_Y", 1) - if logging then - local log = machine:log_send_cmio_response(reason, data) - assert(#log.accesses == 4, string.format("log should have 4 accesses, but it has %s", #log.accesses)) - assert(log.accesses[2].type == "write", "access 2 should be a write") - assert(1 << log.accesses[2].log2_size == case.write_len, "log2_size of write access does not match") - else - machine:send_cmio_response(reason, data) + else + local found + for _, p in ipairs(log_data.pages) do + if p.index == rx_page_idx then + found = p + end end - local expected_rx_buffer = padded_data(data, case.write_len, "\0") - .. string.rep("x", rx_buffer_size - case.write_len) - local new_rx_buffer = machine:read_memory(cartesi.AR_CMIO_RX_BUFFER_START, rx_buffer_size) + assert(found, string.format("missing rx page for data_len=%d", c.data_len)) assert( - new_rx_buffer == expected_rx_buffer, - string.format( - "rx_buffer\n'%s...'\n of length %s does not match\nexpected\n'%s...' of length %s", - string.sub(new_rx_buffer, 1, 80), - #new_rx_buffer, - string.sub(expected_rx_buffer, 1, 80), - #expected_rx_buffer - ) + found.data == string.rep("x", PAGE_SIZE), + string.format("rx page pre-state mismatch for data_len=%d", c.data_len) ) end + os.remove(filename) end end) +print("\n\ntesting verify_send_cmio_response unhappy paths") +do_test("verify_send_cmio_response rejects mismatched Layer 2 args and tampered data", function(machine) + -- supra-page data (>4KB) -> rx-buffer write logged as a node entry, exercising + -- the padded-hash mismatch check and node validation + local data = string.rep("a", 5000) + local reason = 1 + local bad_hash = string.rep("\0", cartesi.HASH_SIZE) + machine:write_reg("iflags_Y", 1) + local hash_before = machine:get_root_hash() + local filename = tmpname_for_log() + local corrupted = tmpname_for_log() + machine:log_send_cmio_response(CMIO_REVERT_HASH, reason, data, filename) + local hash_after = machine:get_root_hash() + -- sanity: happy path + machine:verify_send_cmio_response(CMIO_REVERT_HASH, reason, data, hash_before, filename, hash_after) + -- bad root_hash_before arg + local _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + bad_hash, + filename, + hash_after + ) + check_error_find(err, "root hash before mismatch") + -- bad root_hash_after arg + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + filename, + bad_hash + ) + check_error_find(err, "root hash after mismatch") + -- tampered data: same length so the write_length_log2_size matches, but the + -- bytes differ, so the recomputed padded merkle hash will not match the + -- logged node's hash_after. + local bad_data = string.rep("b", #data) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + bad_data, + hash_before, + filename, + hash_after + ) + check_error_find(err, "write_memory_with_padding does not match logged hash") + -- node log2_size below page size: caught by replay parser + copy_step_log(filename, corrupted, function(log_data) + assert(#log_data.nodes >= 1, "cmio supra-page log should have at least one node") + log_data.nodes[1].log2_size = cartesi.HASH_TREE_LOG2_PAGE_SIZE + end) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + corrupted, + hash_after + ) + check_error_find(err, "invalid log format: node log2 size out of range") + -- node address not aligned to its size: caught by replay parser + copy_step_log(filename, corrupted, function(log_data) + log_data.nodes[1].address = log_data.nodes[1].address + 1 + end) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + corrupted, + hash_after + ) + check_error_find(err, "node address not aligned to its size") + -- duplicate the node so the combined pages+nodes stream has overlap + copy_step_log(filename, corrupted, function(log_data) + table.insert(log_data.nodes, { + address = log_data.nodes[1].address, + log2_size = log_data.nodes[1].log2_size, + hash_before = log_data.nodes[1].hash_before, + hash_after = log_data.nodes[1].hash_after, + }) + end) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + corrupted, + hash_after + ) + check_error_find(err, "page or node overlaps a previous entry") + -- node log2_size == HASH_TREE_LOG2_ROOT_SIZE spans the whole address space, so the + -- parser requires address 0; the rx-buffer node's nonzero address trips "not aligned". + copy_step_log(filename, corrupted, function(log_data) + log_data.nodes[1].log2_size = cartesi.HASH_TREE_LOG2_ROOT_SIZE + end) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + corrupted, + hash_after + ) + check_error_find(err, "node address not aligned to its size") + -- a second node that no write consumes: its hash_after is folded into the + -- post-state root verbatim, so the replayer must reject it + copy_step_log(filename, corrupted, test_util.inject_unconsumed_node) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + corrupted, + hash_after + ) + check_error_find(err, "unconsumed node in step log") + -- canonical form: send_cmio_response logs must record requested_cycle_count = 0 + copy_step_log(filename, corrupted, function(log_data) + log_data.requested_cycle_count = 1 + end) + _, err = pcall( + machine.verify_send_cmio_response, + machine, + CMIO_REVERT_HASH, + reason, + data, + hash_before, + corrupted, + hash_after + ) + check_error_find(err, "requested_cycle_count must be zero in send_cmio_response log") + os.remove(filename) + os.remove(corrupted) +end) + +do_test("verify_send_cmio_response round-trips a single-byte sub-page write", function(machine) + -- Sub-page round-trip: a single byte zero-pads to fill one 32-byte leaf word and + -- is logged as a page entry the replayer reconstructs from data || zero pad. + -- Pre-fill the rx buffer with non-zero bytes so the zero padding is meaningful. + local rx_buffer_size = 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE + machine:write_memory(cartesi.AR_CMIO_RX_BUFFER_START, string.rep("x", rx_buffer_size)) + local data = "a" + local reason = 1 + machine:write_reg("iflags_Y", 1) + local hash_before = machine:get_root_hash() + local filename = tmpname_for_log() + machine:log_send_cmio_response(CMIO_REVERT_HASH, reason, data, filename) + local hash_after = machine:get_root_hash() + machine:verify_send_cmio_response(CMIO_REVERT_HASH, reason, data, hash_before, filename, hash_after) + os.remove(filename) +end) + do_test("send_cmio_response of zero bytes", function(machine) local rx_buffer_size = 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE local initial_rx_buffer = string.rep("x", rx_buffer_size) @@ -1491,18 +1659,18 @@ do_test("send_cmio_response of zero bytes", function(machine) machine:write_reg("iflags_Y", 1) local reason = 1 local data = "" - machine:send_cmio_response(reason, data) + machine:send_cmio_response(CMIO_REVERT_HASH, reason, data) local new_rx_buffer = machine:read_memory(cartesi.AR_CMIO_RX_BUFFER_START, rx_buffer_size) assert(new_rx_buffer == initial_rx_buffer, "rx_buffer should not have been modified") assert(machine:read_reg("iflags_Y") == 0, "iflags.Y should be cleared") -- log and verify machine:write_reg("iflags_Y", 1) local hash_before = machine:get_root_hash() - local log = machine:log_send_cmio_response(reason, data) - util.dump_log(log, io.stderr) - assert(#log.accesses == 3, "log should have 3 accesses") + local filename = tmpname_for_log() + machine:log_send_cmio_response(CMIO_REVERT_HASH, reason, data, filename) local hash_after = machine:get_root_hash() - machine:verify_send_cmio_response(reason, data, hash_before, log, hash_after) + machine:verify_send_cmio_response(CMIO_REVERT_HASH, reason, data, hash_before, filename, hash_after) + os.remove(filename) end) local function test_cmio_buffers_backed_by_files() @@ -1571,373 +1739,52 @@ local function test_cmio_buffers_backed_by_files() end test_cmio_buffers_backed_by_files() -local uarch_store_double_in_t0_to_t1 = { - 0x00533023, -- sd t0,0(t1) -} -test_util.make_do_test(build_machine, machine_type, { - uarch = { - ram = { - backing_store = { data_filename = test_util.create_test_uarch_program(uarch_store_double_in_t0_to_t1) }, - }, - }, -})("Log of word access unaligned to hash tree leaf ", function(machine) - local leaf_size = 1 << cartesi.HASH_TREE_LOG2_WORD_SIZE - local word_size = 8 - local t0 = 5 -- x5 register - local t1 = t0 + 1 -- x6 register - local function make_leaf(w1, w2, w3, w4) - return string.rep(w1, word_size) - .. string.rep(w2, word_size) - .. string.rep(w3, word_size) - .. string.rep(w4, word_size) - end - -- write initial leaf data - local leaf_data = make_leaf("\x11", "\x22", "\x33", "\x44") - assert(#leaf_data == leaf_size) - local leaf_address = cartesi.UARCH_RAM_START_ADDRESS + (1 << cartesi.HASH_TREE_LOG2_WORD_SIZE) - machine:write_memory(leaf_address, leaf_data, leaf_size) - - -- step and log one instruction that stores the word in t0 to the address in t1 - -- returns raw and formatted log - local function log_step() - local log = machine:log_step_uarch(cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) - local temp_file = test_util.new_temp_file() - util.dump_log(log, temp_file) - return log, temp_file:read_all() - end - - -- write to the first word - machine:write_reg("uarch_x" .. t1, leaf_address) - machine:write_reg("uarch_x" .. t0, 0xaaaaaaaaaaaaaaaa) - local log, dump = log_step() - assert( - dump:find( - "write uarch.ram@0x600020(6291488): 0x1111111111111111(1229782938247303441)" - .. " -> 0xaaaaaaaaaaaaaaaa(12297829382473034410)", - 1, - true - ) - ) - assert(log.accesses[7].read == leaf_data) - leaf_data = machine:read_memory(leaf_address, leaf_size) -- read and check written data - assert(leaf_data == make_leaf("\xaa", "\x22", "\x33", "\x44")) - assert(log.accesses[7].written == leaf_data) - - -- restart program and write to second leaf word - machine:write_reg("uarch_pc", cartesi.UARCH_RAM_START_ADDRESS) - machine:write_reg("uarch_x" .. t1, machine:read_reg("uarch_x" .. t1) + word_size) - machine:write_reg("uarch_x" .. t0, 0xbbbbbbbbbbbbbbbb) - log, dump = log_step() - assert( - dump:find( - "write uarch.ram@0x600028(6291496): 0x2222222222222222(2459565876494606882)" - .. " -> 0xbbbbbbbbbbbbbbbb(13527612320720337851)", - 1, - true - ) - ) - assert(log.accesses[7].read == leaf_data) - leaf_data = machine:read_memory(leaf_address, leaf_size) - assert(leaf_data == make_leaf("\xaa", "\xbb", "\x33", "\x44")) - assert(log.accesses[7].written == leaf_data) - - -- restart program and write to third leaf word - machine:write_reg("uarch_pc", cartesi.UARCH_RAM_START_ADDRESS) - machine:write_reg("uarch_x" .. t1, machine:read_reg("uarch_x" .. t1) + word_size) - machine:write_reg("uarch_x" .. t0, 0xcccccccccccccccc) - log, dump = log_step() - assert( - dump:find( - "7: write uarch.ram@0x600030(6291504): 0x3333333333333333(3689348814741910323)" - .. " -> 0xcccccccccccccccc(14757395258967641292)", - 1, - true - ) - ) - assert(log.accesses[7].read == leaf_data) - leaf_data = machine:read_memory(leaf_address, leaf_size) - assert(leaf_data == make_leaf("\xaa", "\xbb", "\xcc", "\x44")) - assert(log.accesses[7].written == leaf_data) - - -- restart program and write to fourth leaf word - machine:write_reg("uarch_pc", cartesi.UARCH_RAM_START_ADDRESS) - machine:write_reg("uarch_x" .. t1, machine:read_reg("uarch_x" .. t1) + word_size) - machine:write_reg("uarch_x" .. t0, 0xdddddddddddddddd) - log, dump = log_step() - assert( - dump:find( - "7: write uarch.ram@0x600038(6291512): 0x4444444444444444(4919131752989213764)" - .. " -> 0xdddddddddddddddd(15987178197214944733)", - 1, - true - ) - ) - assert(log.accesses[7].read == leaf_data) - leaf_data = machine:read_memory(leaf_address, leaf_size) - assert(leaf_data == make_leaf("\xaa", "\xbb", "\xcc", "\xdd")) - assert(log.accesses[7].written == leaf_data) -end) - --- helper function to load a step log file into a table -local function read_step_log_file(filename) - local file = assert(io.open(filename, "rb")) - -- read 72-byte header: root_hash_before[32] + mcycle_count[8] + root_hash_after[32] - local root_hash_before = file:read(32) - local mcycle_count = string.unpack(" = assert(io.open(filename, "wb")) - -- write 72-byte header: root_hash_before[32] + mcycle_count[8] + root_hash_after[32] - file:write(logdata.root_hash_before) - file:write(string.pack(". +-- + +-- Generates the big-machine (sha256) reject fixtures: recorded machine step logs tampered so the +-- shared replayer rejects them at decode. Replayed by the RISC0 guest and the C++/Lua host. Writes +-- /.log plus /_manifest.csv. + +local test_util = require("cartesi.tests.util") +local manifest = require("cartesi.tests.step_log_manifest") + +local function stderr(fmt, ...) + io.stderr:write(string.format(fmt, ...)) +end + +local function help() + stderr("Usage: %s --fixtures-dir= --output-dir=\n", arg[0]) + os.exit() +end + +local fixtures_dir, output_dir +for _, argument in ipairs(arg) do + local f = argument:match("^%-%-fixtures%-dir%=(.*)$") + local o = argument:match("^%-%-output%-dir%=(.*)$") + if f then + fixtures_dir = f + elseif o then + output_dir = o + elseif argument == "-h" or argument == "--help" then + help() + else + error("unrecognized option " .. argument) + end +end +assert(fixtures_dir, "--fixtures-dir is required") +assert(output_dir, "--output-dir is required") + +local BASE = fixtures_dir .. "/cartesi-machine-tests/step-rv64mi-p-access.log" + +local cases = { + { + tag = "bad_signature", + mutate = function(log) + log.signature = "\0" .. log.signature:sub(2) + end, + }, + { + tag = "unsupported_hash_function", + mutate = function(log) + log.hash_function = 99 + end, -- not keccak=0 / sha256=1 + }, + { + tag = "nonzero_scratch_hash", + mutate = function(log) + log.pages[1].scratch_hash = string.rep("\255", 32) + end, + }, + { + tag = "initial_root_mismatch", + -- Tamper a page byte without re-rooting: the header's pre-root no longer matches. + mutate = function(log) + local p = log.pages[#log.pages] + p.data = string.char(p.data:byte(1) ~ 0xff) .. p.data:sub(2) + end, + }, + { + tag = "page_count_zero", + -- A valid log always witnesses at least one page; an empty page set is malformed. + mutate = function(log) + log.pages = {} + end, + }, + { + tag = "page_count_exceeds_size", + -- Header claims far more pages than the body can hold. + mutate = function(log) + log.override_page_count = 0xffffffffff + end, + }, + { + tag = "sibling_count_mismatch", + -- Declared sibling count no longer matches the bytes left after pages and nodes. + mutate = function(log) + log.override_sibling_count = #log.siblings + 1 + end, + }, + { + tag = "page_index_not_increasing", + -- Pages must be strictly ascending by index; duplicate the first index onto the second. + mutate = function(log) + log.pages[2].index = log.pages[1].index + end, + }, + { + tag = "too_few_siblings", + -- Drop a sibling the tree walk needs; the fold runs out of sibling hashes. + mutate = function(log) + table.remove(log.siblings) + end, + }, +} + +assert(os.execute("rm -rf " .. output_dir)) +assert(os.execute("mkdir -p " .. output_dir)) +local out = assert(io.open(output_dir .. "/" .. manifest.MANIFEST_NAME, "w")) +out:write(manifest.HEADER) + +for _, case in ipairs(cases) do + local log = test_util.read_step_log_file(BASE) + case.mutate(log) + local name = case.tag .. ".log" + test_util.write_step_log_file(log, output_dir .. "/" .. name) + manifest.write_row(out, { + kind = "machine", + name = name, + hash_function = "sha256", + requested_cycle_count = log.requested_cycle_count, + initial_root_hash = log.root_hash_before, + final_root_hash = log.root_hash_after, + expect_error = case.tag, + }) + stderr("adversarial machine: %-26s\n", case.tag) +end + +stderr("\nwrote %d adversarial machine step logs to %s\n", #cases, output_dir) diff --git a/tests/lua/record-adversarial-send-cmio-response.lua b/tests/lua/record-adversarial-send-cmio-response.lua new file mode 100755 index 000000000..bf631b9e2 --- /dev/null +++ b/tests/lua/record-adversarial-send-cmio-response.lua @@ -0,0 +1,176 @@ +#!/usr/bin/env lua5.4 +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: LGPL-3.0-or-later +-- +-- This program is free software: you can redistribute it and/or modify it under +-- the terms of the GNU Lesser General Public License as published by the Free +-- Software Foundation, either version 3 of the License, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +-- PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +-- +-- You should have received a copy of the GNU Lesser General Public License along +-- with this program (see COPYING). If not, see . +-- + +-- Generates the send_cmio_response reject fixtures (keccak256), verified by the Solidity verifier +-- and the C++/Lua host. Rejections about the data argument rather than the log bytes (an oversized +-- response) do not fit a fixture row and stay as per-language unit tests. + +local cartesi = require("cartesi") +local test_util = require("cartesi.tests.util") +local manifest = require("cartesi.tests.step_log_manifest") + +local function stderr(fmt, ...) + io.stderr:write(string.format(fmt, ...)) +end + +local function help() + stderr("Usage: %s --fixtures-dir= --output-dir=\n", arg[0]) + os.exit() +end + +local fixtures_dir, output_dir +for _, argument in ipairs(arg) do + local f = argument:match("^%-%-fixtures%-dir%=(.*)$") + local o = argument:match("^%-%-output%-dir%=(.*)$") + if f then + fixtures_dir = f + elseif o then + output_dir = o + elseif argument == "-h" or argument == "--help" then + help() + else + error("unrecognized option " .. argument) + end +end +assert(fixtures_dir, "--fixtures-dir is required") +assert(output_dir, "--output-dir is required") + +-- A small sub-page response (no node) drives the belief checks; a supra-page response +-- (carries a node summarising the padded write) drives the node post-hash check. +local CMIO_SMALL = fixtures_dir .. "/send-cmio-response/send-cmio-response-1.log" +local CMIO_SUPRA = fixtures_dir .. "/send-cmio-response/send-cmio-response-4097.log" +local CMIO_REASON = 1 +local BOGUS = string.rep("\171", 32) +-- Must match the value record-send-cmio-response.lua stored in the base logs' revert-root-hash slot, +-- so the replay rewrites the same byte and the rejection fires on the injected fault, not a mismatch. +local REVERT_ROOT_HASH = string.rep("\xab", 32) + +-- Payload mirrors the positive recorder: dataLength copies of 'a'. +local function payload(n) + return ("a"):rep(n) +end + +local function flip_first_byte(s) + return string.char(s:byte(1) ~ 0xff) .. s:sub(2) +end + +-- iflags.Y must be set for a cmio response; clearing its shadow word (and re-rooting so the +-- log still decodes) makes the replay reject the precondition. +local IFLAGS_Y_ADDR = cartesi.machine:get_reg_address("iflags_Y") +local IFLAGS_PAGE = IFLAGS_Y_ADDR >> 12 +local IFLAGS_OFF = IFLAGS_Y_ADDR & 0xfff + +local function clear_iflags_y(log) + for _, page in ipairs(log.pages) do + if page.index == IFLAGS_PAGE then + page.data = page.data:sub(1, IFLAGS_OFF) .. string.rep("\0", 8) .. page.data:sub(IFLAGS_OFF + 9) + log.root_hash_before = test_util.recompute_step_log_root(log, false, "keccak256") + return + end + end + error("iflags page not found in cmio base log") +end + +-- Each case: tag, base file, payload length, and a mutate(log) that tampers the log in +-- place and returns the claimed {before, after} the manifest hands the verifier. +local cases = { + { + tag = "root_before_mismatch", + base = CMIO_SMALL, + data_length = 1, + mutate = function(_) + return { before = BOGUS } + end, + }, + { + tag = "root_after_mismatch", + base = CMIO_SMALL, + data_length = 1, + mutate = function(_) + return { after = BOGUS } + end, + }, + { + tag = "nonzero_cycle_count", + base = CMIO_SMALL, + data_length = 1, + mutate = function(log) + log.requested_cycle_count = 7 + end, -- cmio must be 0 + }, + { + tag = "final_root_mismatch", + base = CMIO_SMALL, + data_length = 1, + mutate = function(log) + log.root_hash_after = BOGUS + return { after = BOGUS } + end, + }, + { + tag = "cmio_node_hash_mismatch", + base = CMIO_SUPRA, + data_length = 4097, + -- hash_after feeds the post-state, not the pre-root, so decode accepts the mutated + -- log; the replay recomputes the padded write hash and rejects the wrong node hash. + mutate = function(log) + log.nodes[1].hash_after = flip_first_byte(log.nodes[1].hash_after) + end, + }, + { + -- Clearing iflags.Y in a recorded cmio log makes the replay a no-op (send_cmio_response cannot + -- fail), so the recomputed post-state stays at the tampered pre-state and never matches the + -- logged final hash. The forged "transition from a non-yielding machine" is rejected by the + -- final-root check. + tag = "cmio_iflags_cleared", + expect_error = "final_root_mismatch", + base = CMIO_SMALL, + data_length = 1, + mutate = function(log) + clear_iflags_y(log) + return { before = log.root_hash_before } + end, + }, +} + +assert(os.execute("rm -rf " .. output_dir)) +assert(os.execute("mkdir -p " .. output_dir)) +local out = assert(io.open(output_dir .. "/" .. manifest.MANIFEST_NAME, "w")) +out:write(manifest.HEADER) + +for _, case in ipairs(cases) do + local log = test_util.read_step_log_file(case.base) + local claim = case.mutate(log) or {} + local name = case.tag .. ".log" + test_util.write_step_log_file(log, output_dir .. "/" .. name) + manifest.write_row(out, { + kind = "send_cmio_response", + name = name, + hash_function = "keccak256", + requested_cycle_count = log.requested_cycle_count, + initial_root_hash = claim.before or log.root_hash_before, + final_root_hash = claim.after or log.root_hash_after, + reason = CMIO_REASON, + data_length = case.data_length, + data = payload(case.data_length), + revert_root_hash = REVERT_ROOT_HASH, + expect_error = case.expect_error or case.tag, + }) + stderr("adversarial cmio: %-26s\n", case.tag) +end + +stderr("\nwrote %d adversarial cmio step logs to %s\n", #cases, output_dir) diff --git a/tests/lua/record-adversarial-uarch.lua b/tests/lua/record-adversarial-uarch.lua new file mode 100755 index 000000000..5db6314f4 --- /dev/null +++ b/tests/lua/record-adversarial-uarch.lua @@ -0,0 +1,371 @@ +#!/usr/bin/env lua5.4 +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: LGPL-3.0-or-later +-- +-- This program is free software: you can redistribute it and/or modify it under +-- the terms of the GNU Lesser General Public License as published by the Free +-- Software Foundation, either version 3 of the License, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +-- PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +-- +-- You should have received a copy of the GNU Lesser General Public License along +-- with this program (see COPYING). If not, see . +-- + +-- Generates the UARCH (keccak256) reject fixtures: valid uarch base fixtures tampered (or +-- replayed with deliberately wrong claimed values) so the keccak replayers -- the Solidity +-- verifier and the C++/Lua host in keccak mode -- must reject identically. The big-machine +-- (sha256) reject fixtures for RISC0 are a separate generator. Pure log surgery over +-- already-recorded base fixtures; no machine is built here. The manifest's expectError +-- column carries a normalized tag each replayer maps to its own error (C++ message pattern / +-- Solidity selector / RISC0 reject). +-- +-- Tampering a page or the pre-state node re-roots root_before via the same fold the +-- replayer uses, so the log still decodes and the rejection fires at its intended layer. + +local cartesi = require("cartesi") +local test_util = require("cartesi.tests.util") +local manifest = require("cartesi.tests.step_log_manifest") + +local function stderr(fmt, ...) + io.stderr:write(string.format(fmt, ...)) +end + +local function help() + stderr("Usage: %s --fixtures-dir= --output-dir=\n", arg[0]) + os.exit() +end + +local fixtures_dir, output_dir +for _, argument in ipairs(arg) do + local f = argument:match("^%-%-fixtures%-dir%=(.*)$") + local o = argument:match("^%-%-output%-dir%=(.*)$") + if f then + fixtures_dir = f + elseif o then + output_dir = o + elseif argument == "-h" or argument == "--help" then + help() + else + error("unrecognized option " .. argument) + end +end +assert(fixtures_dir, "--fixtures-dir is required") +assert(output_dir, "--output-dir is required") + +local UARCH_BASE = fixtures_dir .. "/uarch-tests-per-cycle/rv64ui-uarch-add/00000.log" +local RESET_BASE = fixtures_dir .. "/reset-uarch/reset-uarch.log" +-- A reset whose machine is paused on a rejected input: the reset reverts, so the verifier substitutes +-- the recorded revert root instead of recomputing the post root. +local REVERT_RESET_BASE = fixtures_dir .. "/reset-uarch/reset-uarch-rejected.log" + +-- Cycle 0 fetches its first instruction at UARCH_PC_INIT (start of uarch RAM). +local PC_INIT = cartesi.UARCH_RAM_START_ADDRESS +local PC_PAGE = PC_INIT >> 12 +local PC_OFF = PC_INIT & 0xfff +local INSN_ILLEGAL = 0x00000000 +local INSN_EBREAK = 0x00100073 +local INSN_ECALL = 0x00000073 -- with pristine x17 (=0), an unknown ecall function + +-- A 32-byte value distinct from any real root, for belief/claim mismatches. +local BOGUS = string.rep("\171", 32) + +-- Overwrite the 32-bit instruction fetched at PC_INIT, then re-root so the log decodes. +local function inject_uarch_instruction(log, insn) + for _, page in ipairs(log.pages) do + if page.index == PC_PAGE then + local le = string.pack("> 12 +local REVERT_OFF = cartesi.AR_SHADOW_REVERT_ROOT_HASH_START & 0xfff +local HTIF_TOHOST_OFF +do + local m = assert(cartesi.machine({ ram = { length = 0x20000 }, uarch = { ram = { backing_store = {} } } })) + HTIF_TOHOST_OFF = m:get_reg_address("htif_tohost") & 0xfff +end + +-- Flip a byte at `off` within the page at `page_idx` WITHOUT re-rooting, so the header pre-root no +-- longer matches. Errors if the page is absent -- a guard that the reset still records this page. +local function tamper_page_byte(log, page_idx, off) + for _, page in ipairs(log.pages) do + if page.index == page_idx then + page.data = page.data:sub(1, off) .. string.char(page.data:byte(off + 1) ~ 0xff) .. page.data:sub(off + 2) + return + end + end + error(string.format("page 0x%x not recorded in reset log (revert-hash/htif shadow page missing?)", page_idx)) +end + +-- Each case: tag, kind, base file, and a mutate(log) that tampers the log in place and +-- returns the claimed {before, cycle, after} the manifest hands the verifier. Defaults +-- (real header values) apply when a field is nil. +local cases = { + -- Framing / header (rejected at decode; claimed values irrelevant) + { + tag = "bad_signature", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + log.signature = "\0" .. log.signature:sub(2) + end, + }, + { + tag = "unsupported_hash_function", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + log.hash_function = 99 + end, -- unsupported code (not keccak=0 / sha256=1) + }, + -- Page / node structure + { + tag = "page_count_zero", + kind = "cycle", + base = UARCH_BASE, + -- Empty the page array: decode rejects a zero page count before any root check. + mutate = function(log) + log.pages = {} + end, + }, + { + tag = "page_index_not_ascending", + kind = "cycle", + base = UARCH_BASE, + -- Duplicate the previous page index so the strictly-ascending check trips. + mutate = function(log) + log.pages[#log.pages].index = log.pages[#log.pages - 1].index + end, + }, + { + tag = "entries_overlap", + kind = "reset_uarch", + base = RESET_BASE, + -- Move the still-aligned uarch-state node onto shadow page 0 so the combined + -- pages+nodes disjointness walk sees overlapping entries. + mutate = function(log) + log.nodes[1].address = 0 + end, + }, + { + tag = "node_misaligned", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + log.nodes[1].address = log.nodes[1].address ~ 0x08 + end, + }, + { + tag = "node_log2_out_of_range", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + log.nodes[1].log2_size = 65 + end, -- > root size (64) + }, + { + tag = "nonzero_scratch_hash", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + log.pages[1].scratch_hash = string.rep("\255", 32) + end, + }, + { + tag = "unconsumed_node", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + test_util.inject_unconsumed_node(log) + end, -- preserves pre-root + }, + { + -- Same injection on a reverted reset: the revert substitutes the recorded root instead of + -- recomputing it, so only the explicit unconsumed-node assertion on the revert path catches this. + tag = "unconsumed_node", + name = "reset_rejected_unconsumed_node", + kind = "reset_uarch", + base = REVERT_RESET_BASE, + mutate = function(log) + test_util.inject_unconsumed_node(log) + end, + }, + { + tag = "initial_root_mismatch", + kind = "cycle", + base = UARCH_BASE, + -- Tamper a page byte WITHOUT re-rooting: the header's pre-root no longer matches. + mutate = function(log) + local p = log.pages[#log.pages] + p.data = string.char(p.data:byte(1) ~ 0xff) .. p.data:sub(2) + end, + }, + -- The revert root hash and htif.tohost the reset accesses are bound into the proof: tampering + -- either, in the shadow page the reset records, trips the pre-root recompute. + { + tag = "initial_root_mismatch", + name = "reset_revert_hash_tampered", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + tamper_page_byte(log, SHADOW_PAGE, REVERT_OFF) + end, + }, + { + tag = "initial_root_mismatch", + name = "reset_htif_tohost_tampered", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + tamper_page_byte(log, SHADOW_PAGE, HTIF_TOHOST_OFF) + end, + }, + { + tag = "nonzero_cycle_count", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + log.requested_cycle_count = 7 + end, -- reset/cmio must be 0 + }, + -- Content traps (instruction injected at PC, log re-rooted) + { + tag = "illegal_instruction", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + inject_uarch_instruction(log, INSN_ILLEGAL) + return { before = log.root_hash_before } + end, + }, + { + tag = "uarch_aborted", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + inject_uarch_instruction(log, INSN_EBREAK) + return { before = log.root_hash_before } + end, + }, + { + tag = "unsupported_ecall", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + inject_uarch_instruction(log, INSN_ECALL) + return { before = log.root_hash_before } + end, + }, + -- Belief / Layer-2 (valid log; the manifest lies about the transition) + { + tag = "root_before_mismatch", + kind = "cycle", + base = UARCH_BASE, + mutate = function(_) + return { before = BOGUS } + end, + }, + { + tag = "root_after_mismatch", + kind = "cycle", + base = UARCH_BASE, + mutate = function(_) + return { after = BOGUS } + end, + }, + { + tag = "cycle_count_mismatch", + kind = "cycle", + base = UARCH_BASE, + mutate = function(log) + return { cycle = log.requested_cycle_count + 1 } + end, + }, + { + tag = "final_root_mismatch", + kind = "cycle", + base = UARCH_BASE, + -- Header claims a post root the replay won't reproduce; claim the same bogus value + -- so the Layer-1 final-root check fires before the Layer-2 argument check. + mutate = function(log) + log.root_hash_after = BOGUS + return { after = BOGUS } + end, + }, + -- Belief / Layer-2 on the reset entrypoint (verifyReset has its own copies of these checks) + { + tag = "root_before_mismatch", + name = "reset_root_before_mismatch", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(_) + return { before = BOGUS } + end, + }, + { + tag = "root_after_mismatch", + name = "reset_root_after_mismatch", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(_) + return { after = BOGUS } + end, + }, + { + tag = "final_root_mismatch", + name = "reset_final_root_mismatch", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + log.root_hash_after = BOGUS + return { after = BOGUS } + end, + }, + -- Replay-semantic (decode accepts via hash_before; replay rejects the post-hash) + { + tag = "reset_node_wrong_posthash", + kind = "reset_uarch", + base = RESET_BASE, + mutate = function(log) + local n = log.nodes[1] + n.hash_after = string.char(n.hash_after:byte(1) ~ 0xff) .. n.hash_after:sub(2) + end, + }, +} + +assert(os.execute("rm -rf " .. output_dir)) +assert(os.execute("mkdir -p " .. output_dir)) +local out = assert(io.open(output_dir .. "/" .. manifest.MANIFEST_NAME, "w")) +out:write(manifest.HEADER) + +for _, case in ipairs(cases) do + local log = test_util.read_step_log_file(case.base) + local claim = case.mutate(log) or {} + -- Distinct filename when several cases share a tag (same rejection on different entrypoints). + local name = (case.name or case.tag) .. ".log" + test_util.write_step_log_file(log, output_dir .. "/" .. name) + manifest.write_row(out, { + kind = case.kind, + name = name, + hash_function = "keccak256", + requested_cycle_count = claim.cycle or log.requested_cycle_count, + initial_root_hash = claim.before or log.root_hash_before, + final_root_hash = claim.after or log.root_hash_after, + expect_error = case.tag, + }) + stderr("adversarial: %-26s (%s)\n", case.tag, case.kind) +end + +stderr("\nwrote %d adversarial step logs to %s\n", #cases, output_dir) diff --git a/tests/lua/record-one-mcycle.lua b/tests/lua/record-one-mcycle.lua new file mode 100755 index 000000000..88ffaff0b --- /dev/null +++ b/tests/lua/record-one-mcycle.lua @@ -0,0 +1,80 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: LGPL-3.0-or-later +-- +-- This program is free software: you can redistribute it and/or modify it under +-- the terms of the GNU Lesser General Public License as published by the Free +-- Software Foundation, either version 3 of the License, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +-- PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +-- +-- You should have received a copy of the GNU Lesser General Public License along +-- with this program (see COPYING). If not, see . +-- + +-- Standalone generator for the single 1-mcycle machine step log the risc0 +-- prove -> compress -> on-chain (Groth16) pipeline proves. A minimal zero-RAM machine +-- keeps it dependency-free (no images or test binaries): the first fetched instruction +-- traps, which is one well-defined mcycle. Defaults to sha256, the risc0 guest's hash. +-- +-- Writes /one-mcycle.log plus /_manifest.csv. + +local cartesi = require("cartesi") +local manifest_mod = require("cartesi.tests.step_log_manifest") + +local function stderr(fmt, ...) + io.stderr:write(string.format(fmt, ...)) +end + +local function help() + stderr("Usage: %s --output-dir= [--hash-function=sha256|keccak256]\n", arg[0]) + os.exit() +end + +local output_dir +local hash_function = "sha256" +for _, argument in ipairs(arg) do + local o = argument:match("^%-%-output%-dir%=(.*)$") + local h = argument:match("^%-%-hash%-function%=(.*)$") + if o then + output_dir = o + elseif h then + hash_function = h + elseif argument == "-h" or argument == "--help" then + help() + else + error("unrecognized option " .. argument) + end +end +assert(output_dir, "--output-dir is required") + +local machine = assert(cartesi.machine({ + hash_tree = { hash_function = hash_function }, + ram = { length = 0x100000 }, + uarch = { ram = { backing_store = {} } }, +})) + +assert(os.execute("mkdir -p " .. output_dir)) +local name = "one-mcycle.log" +local log_path = output_dir .. "/" .. name +os.remove(log_path) + +local ctx = { + kind = "machine", + name = name, + hash_function = hash_function, + requested_cycle_count = 1, +} +ctx.initial_root_hash = machine:get_root_hash() +machine:log_step(1, log_path) +ctx.final_root_hash = machine:get_root_hash() +cartesi.machine:verify_step(ctx.initial_root_hash, log_path, 1, ctx.final_root_hash) + +local manifest = assert(io.open(output_dir .. "/" .. manifest_mod.MANIFEST_NAME, "w")) +manifest:write(manifest_mod.HEADER) +manifest_mod.write_row(manifest, ctx) +stderr("\n1-mcycle step log written to %s\n", log_path) diff --git a/tests/lua/record-reset-uarch.lua b/tests/lua/record-reset-uarch.lua new file mode 100755 index 000000000..2cfab68fc --- /dev/null +++ b/tests/lua/record-reset-uarch.lua @@ -0,0 +1,160 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: LGPL-3.0-or-later +-- +-- This program is free software: you can redistribute it and/or modify it under +-- the terms of the GNU Lesser General Public License as published by the Free +-- Software Foundation, either version 3 of the License, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +-- PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +-- +-- You should have received a copy of the GNU Lesser General Public License along +-- with this program (see COPYING). If not, see . +-- + +-- Records the reset_uarch step-log fixture + its _manifest.csv. +-- Fixtures use keccak256, the Solidity verifier's hash function. +-- +-- Writes /{reset-uarch.log, _manifest.csv}. + +local cartesi = require("cartesi") +local manifest_mod = require("cartesi.tests.step_log_manifest") + +local HASH_FUNCTION = "keccak256" + +-- Fixed sentinel revert root hash seeded in the revert-root-hash shadow slot. The reset accesses it, +-- forcing its shadow page into the step log; tests assert it round-trips into the reset proof. +local REVERT_ROOT_HASH = string.rep("\xab", 32) + +local function stderr(fmt, ...) + io.stderr:write(string.format(fmt, ...)) +end + +local function help() + stderr("Usage: %s --output-dir=\n", arg[0]) + os.exit() +end + +local output_dir +for _, argument in ipairs(arg) do + local o = argument:match("^%-%-output%-dir%=(.*)$") + if o then + output_dir = o + elseif argument == "-h" or argument == "--help" then + help() + else + error("unrecognized option " .. argument) + end +end +assert(output_dir, "--output-dir is required") + +local function build_machine() + return assert(cartesi.machine({ + ram = { length = 0x20000 }, + uarch = { ram = { backing_store = {} } }, + })) +end + +-- Records a uarch reset step log. The uarch state must be non-pristine to be meaningful; +-- setting uarch_halt_flag is the minimum perturbation. +local function create_reset_uarch_step_log() + local machine = build_machine() + local name = "reset-uarch.log" + local log_path = output_dir .. "/" .. name + os.remove(log_path) + machine:write_reg("uarch_halt_flag", 1) + -- Seed the revert-root-hash slot so the reset accesses it, forcing its shadow page into the log. + machine:write_memory(cartesi.AR_SHADOW_REVERT_ROOT_HASH_START, REVERT_ROOT_HASH) + local ctx = { + kind = "reset_uarch", + name = name, + hash_function = HASH_FUNCTION, + requested_cycle_count = 0, + revert_root_hash = REVERT_ROOT_HASH, + } + ctx.initial_root_hash = machine:get_root_hash() + machine:log_reset_uarch(log_path) + ctx.final_root_hash = machine:get_root_hash() + cartesi.machine:verify_reset_uarch(ctx.initial_root_hash, log_path, ctx.final_root_hash) + return ctx +end + +-- htif.tohost encoding a pending manual yield rejected by the dapp: dev=YIELD(2), cmd=MANUAL(1), +-- reason=rx-rejected. The reset detects this and reverts the canonical state to the revert root hash. +local HTIF_DEV_YIELD = 2 +local HTIF_YIELD_CMD_MANUAL = 1 +local TOHOST_RX_REJECTED = (HTIF_DEV_YIELD << 56) + | (HTIF_YIELD_CMD_MANUAL << 48) + | (cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED << 32) + +-- The same manual-yield encoding but rx-accepted: the reset reads tohost yet does NOT revert (only an +-- rx-rejected yield substitutes the revert root), so its canonical post-state is the post-reset root. +local TOHOST_RX_ACCEPTED = (HTIF_DEV_YIELD << 56) + | (HTIF_YIELD_CMD_MANUAL << 48) + | (cartesi.HTIF_YIELD_MANUAL_REASON_RX_ACCEPTED << 32) + +-- Records a uarch reset whose machine is paused on a rejected manual yield. The reset substitutes the +-- recorded revert root hash as its post-state, so the logged root_hash_after is REVERT_ROOT_HASH, not +-- the physical (pristine-uarch) root hash. +local function create_rejected_reset_uarch_step_log() + local machine = build_machine() + local name = "reset-uarch-rejected.log" + local log_path = output_dir .. "/" .. name + os.remove(log_path) + machine:write_reg("uarch_halt_flag", 1) + machine:write_reg("iflags_Y", 1) + machine:write_reg("htif_tohost", TOHOST_RX_REJECTED) + machine:write_memory(cartesi.AR_SHADOW_REVERT_ROOT_HASH_START, REVERT_ROOT_HASH) + local ctx = { + kind = "reset_uarch", + name = name, + hash_function = HASH_FUNCTION, + requested_cycle_count = 0, + revert_root_hash = REVERT_ROOT_HASH, + } + ctx.initial_root_hash = machine:get_root_hash() + machine:log_reset_uarch(log_path) + -- The reset reverted, so the canonical post-state is the revert root hash, not get_root_hash(). + ctx.final_root_hash = REVERT_ROOT_HASH + cartesi.machine:verify_reset_uarch(ctx.initial_root_hash, log_path, ctx.final_root_hash) + return ctx +end + +-- Records a uarch reset whose machine is paused on an rx-accepted manual yield. The reset reads +-- htif.tohost but does NOT revert (only rx-rejected substitutes the revert root), so its canonical +-- post-state is the recomputed post-reset root -- exercising the iflags.Y-set-but-not-rejected branch. +local function create_accepted_reset_uarch_step_log() + local machine = build_machine() + local name = "reset-uarch-accepted.log" + local log_path = output_dir .. "/" .. name + os.remove(log_path) + machine:write_reg("uarch_halt_flag", 1) + machine:write_reg("iflags_Y", 1) + machine:write_reg("htif_tohost", TOHOST_RX_ACCEPTED) + machine:write_memory(cartesi.AR_SHADOW_REVERT_ROOT_HASH_START, REVERT_ROOT_HASH) + local ctx = { + kind = "reset_uarch", + name = name, + hash_function = HASH_FUNCTION, + requested_cycle_count = 0, + revert_root_hash = REVERT_ROOT_HASH, + } + ctx.initial_root_hash = machine:get_root_hash() + machine:log_reset_uarch(log_path) + -- Not reverted: the post-state is the actual post-reset root, not the revert root hash. + ctx.final_root_hash = machine:get_root_hash() + cartesi.machine:verify_reset_uarch(ctx.initial_root_hash, log_path, ctx.final_root_hash) + return ctx +end + +assert(os.execute("mkdir -p " .. output_dir)) +local manifest = assert(io.open(output_dir .. "/" .. manifest_mod.MANIFEST_NAME, "w")) +manifest:write(manifest_mod.HEADER) +manifest_mod.write_row(manifest, create_reset_uarch_step_log()) +manifest_mod.write_row(manifest, create_rejected_reset_uarch_step_log()) +manifest_mod.write_row(manifest, create_accepted_reset_uarch_step_log()) +stderr("\nreset_uarch step logs written to %s\n", output_dir) diff --git a/tests/lua/record-send-cmio-response.lua b/tests/lua/record-send-cmio-response.lua new file mode 100755 index 000000000..ec4c9cd87 --- /dev/null +++ b/tests/lua/record-send-cmio-response.lua @@ -0,0 +1,159 @@ +#!/usr/bin/env lua5.4 + +-- Copyright Cartesi and individual authors (see AUTHORS) +-- SPDX-License-Identifier: LGPL-3.0-or-later +-- +-- This program is free software: you can redistribute it and/or modify it under +-- the terms of the GNU Lesser General Public License as published by the Free +-- Software Foundation, either version 3 of the License, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +-- PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +-- +-- You should have received a copy of the GNU Lesser General Public License along +-- with this program (see COPYING). If not, see . +-- + +-- Records the send_cmio_response step-log fixtures (5 payload sizes) + their _manifest.csv. +-- Fixtures use keccak256, the Solidity verifier's hash function. +-- +-- Writes /{send-cmio-response-.log, _manifest.csv}. + +local cartesi = require("cartesi") +local manifest_mod = require("cartesi.tests.step_log_manifest") + +local HASH_FUNCTION = "keccak256" + +-- Fixed sentinel revert root hash stored in the revert-root-hash shadow slot. Tests assert it +-- round-trips through the send_cmio_response post-state. +local REVERT_ROOT_HASH = string.rep("\xab", 32) + +local function stderr(fmt, ...) + io.stderr:write(string.format(fmt, ...)) +end + +local function help() + stderr("Usage: %s --output-dir=\n", arg[0]) + os.exit() +end + +local output_dir +for _, argument in ipairs(arg) do + local o = argument:match("^%-%-output%-dir%=(.*)$") + if o then + output_dir = o + elseif argument == "-h" or argument == "--help" then + help() + else + error("unrecognized option " .. argument) + end +end +assert(output_dir, "--output-dir is required") + +local function build_machine() + return assert(cartesi.machine({ + ram = { length = 0x20000 }, + uarch = { ram = { backing_store = {} } }, + })) +end + +-- Records one send_cmio_response step log of `data_length` bytes. The payload is a +-- repeated ASCII pattern; the Solidity verifier hashes it on-chain via +-- HashTree.merkleTreeHashPadded. +local function create_send_cmio_response_step_log(data_length, label) + local machine = build_machine() + local data = string.rep("a", data_length) + -- Seed the rx-buffer page non-zero so a replayer that skips zero-padding can't match the + -- post-state hash by coincidence. + machine:write_memory(cartesi.AR_CMIO_RX_BUFFER_START, string.rep("X", 4096)) + local name = "send-cmio-response-" .. label .. ".log" + local log_path = output_dir .. "/" .. name + os.remove(log_path) + machine:write_reg("iflags_Y", 1) + local reason = 1 + local ctx = { + kind = "send_cmio_response", + name = name, + hash_function = HASH_FUNCTION, + requested_cycle_count = 0, + reason = reason, + data = data, + data_length = #data, + revert_root_hash = REVERT_ROOT_HASH, + } + ctx.initial_root_hash = machine:get_root_hash() + machine:log_send_cmio_response(REVERT_ROOT_HASH, reason, data, log_path) + ctx.final_root_hash = machine:get_root_hash() + cartesi.machine:verify_send_cmio_response( + REVERT_ROOT_HASH, + reason, + data, + ctx.initial_root_hash, + log_path, + ctx.final_root_hash + ) + return ctx +end + +-- Records a no-op send_cmio_response: a response to a machine that yielded manual and rejected the +-- previous input leaves the state unchanged. Exercises the transpiled no-op path in the Solidity verifier. +local function create_send_cmio_response_noop_step_log() + local machine = build_machine() + local data = "This is a test cmio response" + local reason = cartesi.HTIF_YIELD_REASON_ADVANCE_STATE + machine:write_reg("iflags_Y", 1) + machine:write_reg("htif_tohost_dev", cartesi.HTIF_DEV_YIELD) + machine:write_reg("htif_tohost_cmd", cartesi.HTIF_YIELD_CMD_MANUAL) + machine:write_reg("htif_tohost_reason", cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED) + local name = "send-cmio-response-noop.log" + local log_path = output_dir .. "/" .. name + os.remove(log_path) + local ctx = { + kind = "send_cmio_response", + name = name, + hash_function = HASH_FUNCTION, + requested_cycle_count = 0, + reason = reason, + data = data, + data_length = #data, + revert_root_hash = REVERT_ROOT_HASH, + } + ctx.initial_root_hash = machine:get_root_hash() + machine:log_send_cmio_response(REVERT_ROOT_HASH, reason, data, log_path) + ctx.final_root_hash = machine:get_root_hash() + assert(ctx.final_root_hash == ctx.initial_root_hash, "no-op must leave the root hash unchanged") + cartesi.machine:verify_send_cmio_response( + REVERT_ROOT_HASH, + reason, + data, + ctx.initial_root_hash, + log_path, + ctx.final_root_hash + ) + return ctx +end + +-- Sizes spanning the boundaries the Solidity verifier dispatches on: +-- 0 -- skips the rx-buffer write entirely (iflags.Y still flips). +-- 1 -- sub-leaf payload, smallest non-empty write. +-- 4096 -- one page exactly; sub-page path top boundary. +-- 4097 -- mixed supra-page payload (page + 1 spilling into the next page). +-- 65536 -- aligned supra-page payload covering 16 pages. +local CMIO_FIXTURE_SIZES = { + { 0, "0" }, + { 1, "1" }, + { 4096, "4096" }, + { 4097, "4097" }, + { 65536, "65536" }, +} + +assert(os.execute("mkdir -p " .. output_dir)) +local manifest = assert(io.open(output_dir .. "/" .. manifest_mod.MANIFEST_NAME, "w")) +manifest:write(manifest_mod.HEADER) +for _, sz in ipairs(CMIO_FIXTURE_SIZES) do + manifest_mod.write_row(manifest, create_send_cmio_response_step_log(sz[1], sz[2])) +end +manifest_mod.write_row(manifest, create_send_cmio_response_noop_step_log()) +stderr("\nsend_cmio_response step logs (%d sizes + no-op) written to %s\n", #CMIO_FIXTURE_SIZES, output_dir) diff --git a/tests/lua/spec-collect-hashes.lua b/tests/lua/spec-collect-hashes.lua index 586faa36c..067835eab 100644 --- a/tests/lua/spec-collect-hashes.lua +++ b/tests/lua/spec-collect-hashes.lua @@ -19,6 +19,28 @@ local function expect_consistent_root_hash(machine) return root_hash end +local function is_rejected_manual_yield(machine) + return machine:read_reg("iflags_Y") ~= 0 + and machine:read_reg("htif_tohost_dev") == cartesi.HTIF_DEV_YIELD + and machine:read_reg("htif_tohost_cmd") == cartesi.HTIF_YIELD_CMD_MANUAL + and machine:read_reg("htif_tohost_reason") == cartesi.HTIF_YIELD_MANUAL_REASON_RX_REJECTED +end + +-- Root hash that verifiers accept after a state transition ending in the machine state, +-- which is the recorded revert root hash when the machine has rejected an input +local function canonical_root_hash(machine) + if is_rejected_manual_yield(machine) then + return machine:read_revert_root_hash() + end + return machine:get_root_hash() +end + +-- Tail accepted by machines whose revert leaf is pristine, required at entry but never consumed +local pristine_revert_uarch_tail = { + string.rep("\x00", cartesi.HASH_SIZE), + string.rep("\x00", cartesi.HASH_SIZE), +} + local function expect_mcycle_root_hashes(machine, mcycle_end, mcycle_period, mcycle_phase, log2_bundle_mcycle_count) -- this reference implementation does not support the following conditions assert(mcycle_end >= 0 and mcycle_end <= math.maxinteger) @@ -35,13 +57,13 @@ local function expect_mcycle_root_hashes(machine, mcycle_end, mcycle_period, mcy if machine:read_reg("mcycle") ~= mcycle_target then mcycle_phase = mcycle_period - (mcycle_target - machine:read_reg("mcycle")) if break_reason == cartesi.BREAK_REASON_HALTED or break_reason == cartesi.BREAK_REASON_YIELDED_MANUALLY then - table.insert(hashes, machine:get_root_hash()) + table.insert(hashes, canonical_root_hash(machine)) at_fixed_point = true end break end mcycle_phase = 0 - table.insert(hashes, machine:get_root_hash()) + table.insert(hashes, canonical_root_hash(machine)) if break_reason ~= cartesi.BREAK_REASON_REACHED_TARGET_MCYCLE then if break_reason == cartesi.BREAK_REASON_HALTED or break_reason == cartesi.BREAK_REASON_YIELDED_MANUALLY then at_fixed_point = true @@ -112,7 +134,8 @@ local function expect_next_mcycle_uarch_root_hashes( expect.equal(machine:read_reg("uarch_halt_flag"), 1) local halt_root_hash = expect_consistent_root_hash(machine) machine:reset_uarch() - local reset_root_hash = expect_consistent_root_hash(machine) + expect_consistent_root_hash(machine) + local reset_root_hash = canonical_root_hash(machine) expect.equal(machine:read_reg("uarch_cycle"), 0) expect.equal(machine:read_reg("mcycle"), mcycle) if log2_bundle_uarch_cycle_count and log2_bundle_uarch_cycle_count > 0 then @@ -133,7 +156,32 @@ local function expect_next_mcycle_uarch_root_hashes( table.insert(reset_indices, #hashes) end -local function expect_uarch_cycle_root_hashes(machine, mcycle_end, log2_bundle_uarch_cycle_count) +-- Appends the period of the reverted machine, as given by the revert uarch tail +local function expect_revert_uarch_tail_period(hashes, reset_indices, revert_uarch_tail, log2_bundle_uarch_cycle_count) + for i = 1, #revert_uarch_tail - 1 do + table.insert(hashes, revert_uarch_tail[i]) + end + local halt_root_hash = revert_uarch_tail[#revert_uarch_tail - 1] + local reset_root_hash = revert_uarch_tail[#revert_uarch_tail] + if log2_bundle_uarch_cycle_count and log2_bundle_uarch_cycle_count > 0 then + local bundle_uarch_cycle_count = 1 << log2_bundle_uarch_cycle_count + -- add halt root hash padding until finishing a bundle + while #hashes % bundle_uarch_cycle_count ~= 0 do + table.insert(hashes, halt_root_hash) + end + -- add repetitions of the halt root hash + for _ = 1, 2 * bundle_uarch_cycle_count - 1 do + table.insert(hashes, halt_root_hash) + end + table.insert(hashes, reset_root_hash) + assert(#hashes % bundle_uarch_cycle_count == 0) + else + table.insert(hashes, reset_root_hash) + end + table.insert(reset_indices, #hashes) +end + +local function expect_uarch_cycle_root_hashes(machine, mcycle_end, log2_bundle_uarch_cycle_count, revert_uarch_tail) -- this reference implementation does not support the following conditions assert(mcycle_end >= 0 and mcycle_end <= math.maxinteger, "unsupported call") assert(machine:read_reg("iflags_H") == 0, "unsupported call") @@ -151,7 +199,19 @@ local function expect_uarch_cycle_root_hashes(machine, mcycle_end, log2_bundle_u end if machine:read_reg("iflags_Y") ~= 0 then break_reason = cartesi.BREAK_REASON_YIELDED_MANUALLY - expect_next_mcycle_uarch_root_hashes(machine, mcycle, hashes, reset_indices, log2_bundle_uarch_cycle_count) + if is_rejected_manual_yield(machine) then + -- the canonical timeline continues from the reverted machine, + -- whose period is given by the revert uarch tail + expect_revert_uarch_tail_period(hashes, reset_indices, revert_uarch_tail, log2_bundle_uarch_cycle_count) + else + expect_next_mcycle_uarch_root_hashes( + machine, + mcycle, + hashes, + reset_indices, + log2_bundle_uarch_cycle_count + ) + end break end if machine:read_reg("iflags_X") ~= 0 then @@ -300,7 +360,7 @@ describe("collect hashes", function() mcycle_phase = mcycle_phase, } ) - expect.equal(machine:collect_uarch_cycle_root_hashes(mcycle_end), { + expect.equal(machine:collect_uarch_cycle_root_hashes(mcycle_end, 0, pristine_revert_uarch_tail), { hashes = {}, reset_indices = {}, break_reason = cartesi.BREAK_REASON_REACHED_TARGET_MCYCLE, @@ -324,7 +384,8 @@ describe("collect hashes", function() local machine_uarch = create_machine({ ram = { length = 4096 } }) machine_uarch:run(mcycle_start) - local collected_uarch = machine_uarch:collect_uarch_cycle_root_hashes(mcycle_end) + local collected_uarch = + machine_uarch:collect_uarch_cycle_root_hashes(mcycle_end, 0, pristine_revert_uarch_tail) expect.equal(machine_uarch:read_reg("mcycle"), mcycle_end) expect.equal(machine_uarch:get_root_hash(), machine:get_root_hash()) expect.equal(#collected_uarch.reset_indices, mcycle_end - mcycle_start) @@ -375,7 +436,8 @@ describe("collect hashes", function() expect.equal(machine:read_reg("mcycle"), mcycle_start) expect.equal(machine:get_root_hash(), expected_root_hash) - local collected_uarch = machine:collect_uarch_cycle_root_hashes(mcycle_end) + local collected_uarch = + machine:collect_uarch_cycle_root_hashes(mcycle_end, 0, pristine_revert_uarch_tail) expect.equal(machine:read_reg("mcycle"), mcycle_start) expect.equal(machine:get_root_hash(), expected_root_hash) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_HALTED) @@ -400,7 +462,8 @@ describe("collect hashes", function() expect.equal(machine:read_reg("mcycle"), mcycle_start) expect.equal(machine:get_root_hash(), expected_root_hash) - local collected_uarch = machine:collect_uarch_cycle_root_hashes(mcycle_end) + local collected_uarch = + machine:collect_uarch_cycle_root_hashes(mcycle_end, 0, pristine_revert_uarch_tail) expect.equal(machine:read_reg("mcycle"), mcycle_start) expect.equal(machine:get_root_hash(), expected_root_hash) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_YIELDED_MANUALLY) @@ -424,7 +487,8 @@ describe("collect hashes", function() expect.equal(machine:read_reg("mcycle"), mcycle_end) expect.equal(machine:get_root_hash(), expected_root_hash) - local collected_uarch = machine:collect_uarch_cycle_root_hashes(mcycle_end) + local collected_uarch = + machine:collect_uarch_cycle_root_hashes(mcycle_end, 0, pristine_revert_uarch_tail) expect.equal(machine:get_root_hash(), expected_root_hash) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_HALTED) expect.equal(#collected_uarch.reset_indices, 1) @@ -447,7 +511,8 @@ describe("collect hashes", function() expect.equal(machine:read_reg("mcycle"), mcycle_end) expect.equal(machine:get_root_hash(), expected_root_hash) - local collected_uarch = machine:collect_uarch_cycle_root_hashes(mcycle_end) + local collected_uarch = + machine:collect_uarch_cycle_root_hashes(mcycle_end, 0, pristine_revert_uarch_tail) expect.equal(machine:get_root_hash(), expected_root_hash) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_YIELDED_MANUALLY) expect.equal(#collected_uarch.reset_indices, 1) @@ -488,7 +553,8 @@ describe("collect hashes", function() it("should collect uarch cycles during mcycle overflows", function() local machine = create_machine({ ram = { length = 4096 } }) machine:write_reg("mcycle", cartesi.MCYCLE_MAX - 1) - local collected_uarch = machine:collect_uarch_cycle_root_hashes(cartesi.MCYCLE_MAX) + local collected_uarch = + machine:collect_uarch_cycle_root_hashes(cartesi.MCYCLE_MAX, 0, pristine_revert_uarch_tail) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_REACHED_TARGET_MCYCLE) local expected_root_hash = machine:get_root_hash() expect.equal(collected_uarch.hashes[collected_uarch.reset_indices[1]], expected_root_hash) @@ -496,7 +562,8 @@ describe("collect hashes", function() expect.equal(#collected_uarch.reset_indices, 2) expect.equal(machine:read_reg("mcycle"), cartesi.MCYCLE_MAX) - collected_uarch = machine:collect_uarch_cycle_root_hashes(cartesi.MCYCLE_MAX) + collected_uarch = + machine:collect_uarch_cycle_root_hashes(cartesi.MCYCLE_MAX, 0, pristine_revert_uarch_tail) expect.equal(machine:get_root_hash(), expected_root_hash) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_REACHED_TARGET_MCYCLE) expect.equal(#collected_uarch.reset_indices, 1) @@ -783,7 +850,11 @@ describe("collect hashes", function() local compare_machine = cartesi.machine(add_machine_config) machine:run(mcycle_start) compare_machine:run(mcycle_start) - local collected = machine:collect_uarch_cycle_root_hashes(mcycle_end, log2_uarch_cycle_mcycle_count) + local collected = machine:collect_uarch_cycle_root_hashes( + mcycle_end, + log2_uarch_cycle_mcycle_count, + pristine_revert_uarch_tail + ) local expected_collected = expect_uarch_cycle_root_hashes(compare_machine, mcycle_end, log2_uarch_cycle_mcycle_count) expect.equal(collected, expected_collected) @@ -959,7 +1030,8 @@ describe("collect hashes", function() expect_consistent_root_hash(machine) collect_machine:run(big_last_mcycle - mcycle_count) expect.equal(machine:get_root_hash(), collect_machine:get_root_hash()) - local collected = collect_machine:collect_uarch_cycle_root_hashes(big_last_mcycle) + local collected = + collect_machine:collect_uarch_cycle_root_hashes(big_last_mcycle, 0, pristine_revert_uarch_tail) local expected_collected = expect_uarch_cycle_root_hashes(machine, big_last_mcycle) local halt_exit_code = machine:read_reg("htif_tohost_data") >> 1 expect.equal(collected, expected_collected) @@ -995,15 +1067,20 @@ describe("collect hashes", function() local mcycles_to_phase0 = mcycle_period - ((machine:read_reg("mcycle") - mcycle_phase_offset) % mcycle_period) local mcycle_target = machine:read_reg("mcycle") + mcycles_to_phase0 - local collected = collect_machine:collect_uarch_cycle_root_hashes(mcycle_target) - local expected_collected = expect_uarch_cycle_root_hashes(machine, mcycle_target) + local collected = collect_machine:collect_uarch_cycle_root_hashes( + mcycle_target, + 0, + pristine_revert_uarch_tail + ) + local expected_collected = + expect_uarch_cycle_root_hashes(machine, mcycle_target, nil, pristine_revert_uarch_tail) expect.equal(collect_machine:read_reg("mcycle"), machine:read_reg("mcycle")) expect.equal(collected, expected_collected) expect.equal(collect_machine:get_root_hash(), machine:get_root_hash()) mcycles_to_phase0 = (machine:read_reg("mcycle") - mcycle_phase_offset) % mcycle_period local at_fixed_point = machine:read_reg("iflags_Y") ~= 0 or machine:read_reg("iflags_H") ~= 0 if mcycles_to_phase0 == 0 or at_fixed_point then - expect.equal(yield_sparse_hashes[sparse_hashes_count + 1], machine:get_root_hash()) + expect.equal(yield_sparse_hashes[sparse_hashes_count + 1], canonical_root_hash(machine)) sparse_hashes_count = sparse_hashes_count + 1 end if machine:read_reg("iflags_Y") == 1 then diff --git a/tests/lua/spec-verify-step-failure.lua b/tests/lua/spec-verify-step-failure.lua index 2d3813342..90573cca6 100644 --- a/tests/lua/spec-verify-step-failure.lua +++ b/tests/lua/spec-verify-step-failure.lua @@ -15,9 +15,18 @@ -- --[[ -Tests that verify_step correctly rejects corrupted binary step log files. -This exercises the validation error paths in replay-step-state-access.h -(the constructor that parses the binary log format). +Failure-mode tests for cartesi.machine:verify_step. + +Three flavors of failure are exercised: + + - Layer 2 argument checks (caller-belief mismatch with the log header). + - Layer 1 / parse-layer corruption (signature, counts, page/sibling + structure) in the replay_step_state_access constructor. + - Interpretation-time adversarial logs that are merkle-self-consistent + but still must be rejected during replay. + +Format-corruption coverage lives here, exercised against the +replay_step_state_access parser via verify_step. ]] local cartesi = require("cartesi") @@ -26,85 +35,56 @@ local test_util = require("cartesi.tests.util") local describe, it, expect = lester.describe, lester.it, lester.expect -local HASH_SIZE = cartesi.HASH_SIZE -- 32 -local LOG2_PAGE_SIZE = 12 -local PAGE_SIZE = 1 << LOG2_PAGE_SIZE -- 4096 -local LOG2_ROOT_SIZE = 64 -local PAGE_ENTRY_SIZE = 8 + PAGE_SIZE + HASH_SIZE -- page_index + data + scratch_hash - --- Field offsets in the binary step log -local OFFSET_ROOT_HASH_BEFORE = 0 -local OFFSET_MCYCLE_COUNT = OFFSET_ROOT_HASH_BEFORE + HASH_SIZE -- 32 -local OFFSET_ROOT_HASH_AFTER = OFFSET_MCYCLE_COUNT + 8 -- 40 -local OFFSET_HASH_FUNCTION = OFFSET_ROOT_HASH_AFTER + HASH_SIZE -- 72 -local OFFSET_PAGE_COUNT = OFFSET_HASH_FUNCTION + 8 -- 80 -local OFFSET_FIRST_PAGE = OFFSET_PAGE_COUNT + 8 -- 88 - --- Produce a valid step log and return (root_hash_before, mcycle_count, root_hash_after, log_data) -local function produce_valid_step_log() - local machine = cartesi.machine({ - ram = { length = 0x20000 }, +local HASH_SIZE = cartesi.HASH_SIZE +local LOG2_PAGE_SIZE = cartesi.HASH_TREE_LOG2_PAGE_SIZE +local PAGE_SIZE = 1 << LOG2_PAGE_SIZE +local LOG2_ROOT_SIZE = cartesi.HASH_TREE_LOG2_ROOT_SIZE +local BAD_HASH = string.rep("\0", HASH_SIZE) +local MCYCLE_COUNT = 10 + +-- Machine configured so log_step touches multiple pages (shadow_state, an +-- instruction page, and the PMA page). +local function build_machine(hash_fn) + return cartesi.machine({ + hash_tree = hash_fn and { hash_function = hash_fn } or nil, + ram = { length = 1 << 20 }, + uarch = { + ram = { + length = 0x1000, + backing_store = { + data_filename = test_util.create_test_uarch_program(test_util.uarch_programs.default), + }, + }, + }, }, {}) +end + +-- Produce a fresh valid log + endpoint hashes; each call gives an isolated file. +local function produce_valid_log(hash_fn) + local machine = build_machine(hash_fn) + machine:write_reg("mcycle", 0) local root_hash_before = machine:get_root_hash() - local log_filename = os.tmpname() - os.remove(log_filename) -- log_step requires the file not to exist - local mcycle_count = 1 - machine:log_step(mcycle_count, log_filename) + local filename = os.tmpname() + os.remove(filename) + machine:log_step(MCYCLE_COUNT, filename) local root_hash_after = machine:get_root_hash() - -- Read the binary log file - local f = assert(io.open(log_filename, "rb")) - local log_data = f:read("a") - os.remove(log_filename) - return root_hash_before, mcycle_count, root_hash_after, log_data + return filename, root_hash_before, root_hash_after end --- Write log_data to a temp file, call verify_step, clean up -local function verify_step_with_data(root_hash_before, mcycle_count, root_hash_after, log_data) - local log_filename = os.tmpname() - local f = assert(io.open(log_filename, "wb")) - f:write(log_data) - f:close() +-- Run verify_step against a corrupted copy produced by `mutate`. Returns (ok, err). +local function verify_corrupted(filename, mutate, root_hash_before, mcycle_count, root_hash_after) + local corrupted = os.tmpname() + test_util.copy_step_log(filename, corrupted, mutate) local ok, err = pcall(function() - cartesi.machine:verify_step(root_hash_before, log_filename, mcycle_count, root_hash_after) + cartesi.machine:verify_step(root_hash_before, corrupted, mcycle_count, root_hash_after) end) - os.remove(log_filename) + os.remove(corrupted) return ok, err end --- Helper: produce a valid log, corrupt it, verify it fails with expected error -local function should_fail(expected_error, corrupt) - local root_hash_before, mcycle_count, root_hash_after, log_data = produce_valid_step_log() - local corrupted = corrupt(log_data, root_hash_before, mcycle_count, root_hash_after) - if type(corrupted) == "table" then - -- corrupt() returned {log_data, root_hash_before, mcycle_count, root_hash_after} - log_data = corrupted[1] - root_hash_before = corrupted[2] - mcycle_count = corrupted[3] - root_hash_after = corrupted[4] - else - log_data = corrupted - end - local ok, err = verify_step_with_data(root_hash_before, mcycle_count, root_hash_after, log_data) - expect.falsy(ok) - expect.truthy(err and err:find(expected_error, 1, true), err) -end - --- Helper: verify the happy path works -local function should_succeed() - local root_hash_before, mcycle_count, root_hash_after, log_data = produce_valid_step_log() - local ok, err = verify_step_with_data(root_hash_before, mcycle_count, root_hash_after, log_data) - expect.truthy(ok, err) -end - --- Replace bytes at a given offset in a string -local function replace_at(data, offset, replacement) - return data:sub(1, offset) .. replacement .. data:sub(offset + #replacement + 1) -end - --- Compute sibling hashes for a given subset of page indices using the machine's --- hash tree. This implements the same tree walk as compute_root_hash_impl in --- replay-step-state-access.h, but uses get_node_hash for subtrees that contain --- no selected pages instead of consuming from a sibling array. +-- Three-cursor sibling computation: replicates replay_step_state_access's tree +-- walk for an arbitrary subset of present page indices, using a pristine +-- machine's node hashes for subtrees that contain none of those pages. local function get_siblings_for_pages(machine, page_indices) table.sort(page_indices) local log2_page_count = LOG2_ROOT_SIZE - LOG2_PAGE_SIZE @@ -113,12 +93,10 @@ local function get_siblings_for_pages(machine, page_indices) local function walk(first_page_index, lpc) local range_size = 1 << lpc - -- Check if any selected pages fall in this range local has_page = next_page <= #page_indices and page_indices[next_page] >= first_page_index and page_indices[next_page] < first_page_index + range_size if not has_page then - -- No pages in range: get the subtree hash from the machine local addr = first_page_index << LOG2_PAGE_SIZE local log2_size = lpc + LOG2_PAGE_SIZE table.insert(siblings, machine:get_node_hash(addr, log2_size)) @@ -128,7 +106,6 @@ local function get_siblings_for_pages(machine, page_indices) walk(first_page_index, lpc - 1) walk(first_page_index + (range_size >> 1), lpc - 1) else - -- Leaf: this page is present, skip it next_page = next_page + 1 end end @@ -137,401 +114,365 @@ local function get_siblings_for_pages(machine, page_indices) return siblings end --- Build a binary step log from components -local function build_step_log(root_hash_before, mcycle_count, root_hash_after, hash_function, pages, siblings) - local parts = { - root_hash_before, - string.pack(" = assert(io.open(truncated, "wb")) + f:write(string.rep("\0", 16)) + end + local ok, err = pcall(function() + cartesi.machine:verify_step(root_hash_before, truncated, MCYCLE_COUNT, root_hash_after) + end) + os.remove(truncated) + expect.falsy(ok) + expect.truthy(err and err:find("step log shorter than header", 1, true), err) end) end) - it("should reject log truncated before hash_function", function() - should_fail("hash function type past end of step log", function(log_data) - return log_data:sub(1, OFFSET_HASH_FUNCTION) + it("rejects an invalid signature", function() + expect_corruption_error("invalid step log signature", function(log_data) + log_data.signature = string.rep("\0", 8) end) end) - it("should reject log truncated before page_count", function() - should_fail("page count past end of step log", function(log_data) - return log_data:sub(1, OFFSET_PAGE_COUNT) + it("rejects an unsupported hash function type", function() + expect_corruption_error("unsupported hash function type", function(log_data) + log_data.hash_function = 0xffff end) end) - it("should reject log truncated in the middle of page data", function() - -- Truncate one byte into the first page entry - should_fail("page data past end of step log", function(log_data) - return log_data:sub(1, OFFSET_FIRST_PAGE + 1) + it("rejects a logged requested_cycle_count that disagrees with the argument", function() + expect_corruption_error("mcycle count mismatch", function(log_data) + log_data.requested_cycle_count = MCYCLE_COUNT + 1 end) end) - it("should reject log truncated before sibling_count", function() - -- Parse page_count to find where sibling_count should be - should_fail("sibling count past end of step log", function(log_data) - local page_count = string.unpack(" 0, "test requires at least one sibling hash") - -- Keep only the sibling_count field but truncate the sibling hashes - return log_data:sub(1, sibling_count_offset + 8 + 1) + it("rejects node_count overflow against file size", function() + expect_corruption_error("node count exceeds step log size", function(log_data) + log_data.override_node_count = cartesi.MCYCLE_MAX end) end) - end) - describe("constructor: format validation errors", function() - it("should reject unsupported hash function type", function() - should_fail("unsupported hash function type", function(log_data) - -- Replace hash_function field (at offset 72) with an invalid value (0xFF) - local bad_hash_fn = string.pack(" = cartesi.machine(config, {}) - local root_hash_before = log_machine:get_root_hash() - - -- Produce a valid step log to learn which pages are needed - local log_filename = os.tmpname() - os.remove(log_filename) - local mcycle_count = 1 - log_machine:log_step(mcycle_count, log_filename) - local root_hash_after = log_machine:get_root_hash() - - local f = assert(io.open(log_filename, "rb")) - local log_data = f:read("a") - f:close() - os.remove(log_filename) - - -- Parse pages from the valid log - local pages = parse_pages(log_data) - assert(#pages >= 2, "test requires at least 2 pages") - - -- Remove the last page -- the interpreter will still need it - local reduced_pages = {} - for i = 1, #pages - 1 do - reduced_pages[i] = pages[i] - end + describe("sibling validation", function() + it("rejects an extra page that consumes a missing sibling slot", function() + expect_corruption_error("too many sibling hashes in log", function(log_data) + local last = log_data.pages[#log_data.pages] + table.insert(log_data.pages, { + index = last.index + 1, + data = last.data, + scratch_hash = last.scratch_hash, + }) + end, { root_hash_after = BAD_HASH }) + end) - -- Collect page indices for the reduced set - local reduced_indices = {} - for i, p in ipairs(reduced_pages) do - reduced_indices[i] = p.index - end + it("rejects a removed page that frees a sibling slot", function() + expect_corruption_error("too many sibling hashes in log", function(log_data) + table.remove(log_data.pages) + end, { root_hash_after = BAD_HASH }) + end) - -- Use a fresh machine (still in initial state) to compute siblings - local fresh_machine = cartesi.machine(config, {}) - local siblings = get_siblings_for_pages(fresh_machine, reduced_indices) + it("rejects a removed sibling hash", function() + expect_corruption_error("too few sibling hashes in log", function(log_data) + table.remove(log_data.siblings) + end, { root_hash_after = BAD_HASH }) + end) - -- Read hash_function from the original log - local hash_function = string.unpack("= 2, "test needs a machine that produces at least 2 pages per step") + + table.remove(log.pages) + local kept_indices = {} + for i, p in ipairs(log.pages) do + kept_indices[i] = p.index + end + local fresh = build_machine() + log.siblings = get_siblings_for_pages(fresh, kept_indices) + + local adversarial = os.tmpname() + os.remove(adversarial) + test_util.write_step_log_file(log, adversarial) + local ok, err = pcall(function() + cartesi.machine:verify_step(root_hash_before, adversarial, MCYCLE_COUNT, root_hash_after) + end) + os.remove(adversarial) expect.falsy(ok) expect.truthy(err and err:find("required page not found", 1, true), err) end) end) describe("interpret: corrupted PMA data", function() - it("should reject log with invalid PMA entries", function() - -- Craft a log where the PMA page has garbage istart/ilength. - -- The Merkle tree is self-consistent (root hash matches the - -- corrupted data), but make_mock_address_range rejects the values. - local config = { ram = { length = 0x20000 } } - local log_machine = cartesi.machine(config, {}) - - local log_filename = os.tmpname() - os.remove(log_filename) - local mcycle_count = 1 - log_machine:log_step(mcycle_count, log_filename) - local root_hash_after = log_machine:get_root_hash() - - local f = assert(io.open(log_filename, "rb")) - local log_data = f:read("a") - f:close() - os.remove(log_filename) - - local pages = parse_pages(log_data) - local hash_function = string.unpack("> LOG2_PAGE_SIZE - local pma_page_pos = nil - for i, p in ipairs(pages) do + local pma_pos + for i, p in ipairs(log.pages) do if p.index == pma_page_index then - pma_page_pos = i + pma_pos = i break end end - assert(pma_page_pos, "PMA page not found in log") - - -- Corrupt the PMA page: fill with 0xFF so istart/ilength are garbage - pages[pma_page_pos].data = string.rep("\xff", PAGE_SIZE) - - -- Compute the new root hash by walking up from the corrupted page. - -- Get the proof path from the real machine (siblings at each level). - local fresh_machine = cartesi.machine(config, {}) - local pma_page_addr = pma_page_index << LOG2_PAGE_SIZE - -- Start with the Merkle tree hash of the corrupted page data - local node_hash = test_util.merkle_hash(pages[pma_page_pos].data, 0, LOG2_PAGE_SIZE, hash_fn) - -- Walk up from page level to root, combining with sibling hashes + assert(pma_pos, "PMA page not found in log -- machine config must touch it") + + log.pages[pma_pos].data = string.rep("\xff", PAGE_SIZE) + + -- Walk corrupted PMA page up to root, picking siblings from a + -- pristine machine to keep the merkle tree self-consistent. + local fresh = build_machine() + local pma_addr = pma_page_index << LOG2_PAGE_SIZE + local node_hash = test_util.merkle_hash(log.pages[pma_pos].data, 0, LOG2_PAGE_SIZE, hash_fn) for log2_size = LOG2_PAGE_SIZE, LOG2_ROOT_SIZE - 1 do local bit = 1 << log2_size - local sibling_addr = (pma_page_addr ~ bit) & ~(bit - 1) - local sibling_hash = fresh_machine:get_node_hash(sibling_addr, log2_size) - if (pma_page_addr & bit) ~= 0 then - node_hash = cartesi[hash_fn](sibling_hash, node_hash) + local sibling_addr = (pma_addr ~ bit) & ~(bit - 1) + local sibling = fresh:get_node_hash(sibling_addr, log2_size) + if (pma_addr & bit) ~= 0 then + node_hash = cartesi[hash_fn](sibling, node_hash) else - node_hash = cartesi[hash_fn](node_hash, sibling_hash) + node_hash = cartesi[hash_fn](node_hash, sibling) end end - local new_root_hash = node_hash + local new_root = node_hash - -- Build the log with the corrupted page and the new root hash - local page_indices = {} - for i, p in ipairs(pages) do - page_indices[i] = p.index + local indices = {} + for i, p in ipairs(log.pages) do + indices[i] = p.index end - local siblings = get_siblings_for_pages(fresh_machine, page_indices) - local adversarial_log = - build_step_log(new_root_hash, mcycle_count, root_hash_after, hash_function, pages, siblings) - - local ok, err = verify_step_with_data(new_root_hash, mcycle_count, root_hash_after, adversarial_log) - expect.falsy(ok) - -- The abrt lambda (L463) is invoked through ABRTF in the - -- address_range constructor, called from make_mock_address_range - expect.truthy(err and err:find("when initializing", 1, true), err) - end) - end) - - describe("compute_root_hash: page/sibling count errors", function() - it("should reject too many pages in log", function() - -- Add an extra page with index >= 2^52 (beyond the tree's page index - -- range). The tree walk never reaches it, so next_page < page_count. - should_fail("too many pages in log", function(log_data) - local page_count = string.unpack(" 0, "test requires at least one sibling hash") - -- Set sibling_count to 0 and remove all sibling hashes - local truncated = log_data:sub(1, sibling_count_offset) .. string.pack(". --- - ---[[ -Tests that verify_step_uarch and verify_send_cmio_response correctly reject -corrupted access logs. This exercises the validation error paths in -uarch-replay-state-access.h and replay-send-cmio-state-access.h. -]] - -local cartesi = require("cartesi") -local lester = require("cartesi.third-party.lester") -local test_util = require("cartesi.tests.util") - -local describe, it, expect = lester.describe, lester.it, lester.expect - -local uarch_test_path = test_util.tests_uarch_path - --- Uarch halt program: li a7,halt; ecall -local UARCH_HALT_INSN = (cartesi.UARCH_ECALL_FN_HALT << 20) | 0x00893 -local uarch_default_program = { - 0x07b00513, -- li a0,123 - UARCH_HALT_INSN, -- li a7,halt - 0x00000073, -- ecall -} - -local function create_test_uarch_program(instructions) - local file_path = os.tmpname() - local f = assert(io.open(file_path, "wb")) - for _, insn in pairs(instructions) do - f:write(string.pack("I4", insn)) - end - return file_path -end - --- A hash guaranteed to be wrong for any valid proof -local bad_hash = string.rep("\xba", cartesi.HASH_SIZE) - --- Build a machine with the default uarch test program (li a0,123; halt ecall) -local function build_default_machine() - local data_filename = create_test_uarch_program(uarch_default_program) - local machine = cartesi.machine({ - ram = { length = 0x20000 }, - uarch = { - ram = { - length = 0x1000, - backing_store = { data_filename = data_filename }, - }, - }, - }, {}) - os.remove(data_filename) - return machine -end - --- Build a machine with the ecall-write-tlb test binary for TLB write tests -local function build_tlb_machine() - return cartesi.machine({ - ram = { length = 0x20000 }, - uarch = { - ram = { - backing_store = { - data_filename = uarch_test_path .. "/rv64ui-uarch-ecall-write-tlb.bin", - }, - }, - }, - }, {}) -end - --- Build a machine suitable for log_send_cmio_response -local function build_cmio_machine() - return cartesi.machine({ - ram = { length = 0x20000 }, - uarch = {}, - }, {}) -end - --- Helper: get a fresh step log, apply a corruption, verify it fails with expected error -local function step_should_fail(machine_builder, expected_error, corrupt) - local machine = machine_builder() - local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch() - local final_hash = machine:get_root_hash() - corrupt(log) - expect.fail(function() - machine:verify_step_uarch(initial_hash, log, final_hash) - end, expected_error) -end - --- Helper: get a fresh cmio log, apply a corruption, verify it fails with expected error -local function cmio_should_fail(expected_error, corrupt, options) - options = options or {} - local machine = build_cmio_machine() - machine:write_reg("iflags_Y", 1) - local reason = options.reason or 1 - local data = options.data or "test cmio data" - local initial_hash = machine:get_root_hash() - local log = machine:log_send_cmio_response(reason, data) - local final_hash = machine:get_root_hash() - corrupt(log) - expect.fail(function() - machine:verify_send_cmio_response(reason, data, initial_hash, log, final_hash) - end, expected_error) -end - --- Find the index of an access to a TLB slot (by address range) -local function find_tlb_access(log) - for i = 1, #log.accesses do - local a = log.accesses[i] - if - a.address >= cartesi.AR_SHADOW_TLB_START - and a.address < cartesi.AR_SHADOW_TLB_START + cartesi.AR_SHADOW_TLB_LENGTH - then - return i - end - end - return nil -end - --- Helper: step through the TLB test until we find a TLB write, corrupt it, verify failure -local function tlb_step_should_fail(expected_error, corrupt) - local machine = build_tlb_machine() - for _ = 1, 46 do - local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch() - local final_hash = machine:get_root_hash() - local tlb_idx = find_tlb_access(log) - if tlb_idx then - corrupt(log, tlb_idx) - expect.fail(function() - machine:verify_step_uarch(initial_hash, log, final_hash) - end, expected_error) - return - end - if machine:read_reg("uarch_halt_flag") ~= 0 then - break - end - end - error("did not find a TLB write access in the ecall-write-tlb test") -end - --------------------------------------------------------------------------------- - -describe("verify_step_uarch", function() - describe("basic step", function() - it("should reject empty access log", function() - step_should_fail(build_default_machine, "log is missing access", function(log) - log.accesses = {} - end) - end) - - it("should reject extra access at end", function() - step_should_fail(build_default_machine, "access log was not fully consumed", function(log) - log.accesses[#log.accesses + 1] = log.accesses[1] - end) - end) - - it("should reject wrong type on 1st read access", function() - step_should_fail(build_default_machine, "expected 1st access to read uarch.cycle", function(log) - log.accesses[1].type = "write" - end) - end) - - it("should reject unexpected written data on read access", function() - step_should_fail(build_default_machine, "unexpected written data in 1st access read access", function(log) - log.accesses[1].written = log.accesses[1].read - end) - end) - - it("should reject unexpected written hash on read access", function() - step_should_fail(build_default_machine, "unexpected written hash in 1st access read access", function(log) - log.accesses[1].written_hash = log.accesses[1].read_hash - end) - end) - - it("should reject wrong address on read access", function() - step_should_fail(build_default_machine, "expected 1st access to read uarch.cycle at address", function(log) - log.accesses[1].address = 0 - end) - end) - - it("should reject wrong log2_size on read access", function() - step_should_fail(build_default_machine, "expected 1st access to uarch.cycle to read 2^", function(log) - log.accesses[1].log2_size = 2 - end) - end) - - it("should reject corrupt sibling hash on read access", function() - step_should_fail( - build_default_machine, - "siblings and read hash do not match root hash before 1st access", - function(log) - log.accesses[1].sibling_hashes[1] = bad_hash - end - ) - end) - - it("should reject missing read data", function() - step_should_fail(build_default_machine, "missing read data for uarch.cycle in 1st access", function(log) - log.accesses[1].read = nil - end) - end) - - it("should reject corrupt read data", function() - step_should_fail( - build_default_machine, - "read data for uarch.cycle does not match read hash in 1st access", - function(log) - log.accesses[1].read = string.rep("\xff", #log.accesses[1].read) - end - ) - end) - - it("should reject wrong type on last write access", function() - step_should_fail(build_default_machine, "expected", function(log) - log.accesses[#log.accesses].type = "read" - end) - end) - - it("should reject wrong address on write access", function() - step_should_fail(build_default_machine, "at address", function(log) - log.accesses[#log.accesses].address = 0 - end) - end) - - it("should reject wrong log2_size on write access", function() - step_should_fail(build_default_machine, "to write 2^", function(log) - log.accesses[#log.accesses].log2_size = 2 - end) - end) - - it("should reject corrupt siblings on write access", function() - step_should_fail(build_default_machine, "siblings and read hash do not match root hash", function(log) - log.accesses[#log.accesses].sibling_hashes[1] = bad_hash - end) - end) - - it("should reject missing read data on write access", function() - step_should_fail(build_default_machine, "missing read data for", function(log) - log.accesses[#log.accesses].read = nil - end) - end) - - it("should reject corrupt read data on write access", function() - step_should_fail(build_default_machine, "read data for", function(log) - log.accesses[#log.accesses].read = string.rep("\xff", #log.accesses[#log.accesses].read) - end) - end) - - it("should reject missing written_hash on write access", function() - step_should_fail(build_default_machine, "missing written hash of", function(log) - log.accesses[#log.accesses].written_hash = nil - end) - end) - - it("should reject wrong written_hash on write access", function() - step_should_fail(build_default_machine, "written hash for", function(log) - log.accesses[#log.accesses].written_hash = bad_hash - end) - end) - - it("should reject corrupt written data on write access", function() - step_should_fail(build_default_machine, "written data for", function(log) - log.accesses[#log.accesses].written = string.rep("\xff", #log.accesses[#log.accesses].written) - end) - end) - - it("should reject wrong type on 2nd access", function() - step_should_fail(build_default_machine, "expected 2nd access to", function(log) - log.accesses[2].type = "write" - end) - end) - - it("should reject wrong type on 3rd access", function() - step_should_fail(build_default_machine, "expected 3rd access to", function(log) - log.accesses[3].type = "write" - end) - end) - - it("should reject wrong type on 4th access", function() - step_should_fail(build_default_machine, "expected 4th access to", function(log) - if log.accesses[4].type == "read" then - log.accesses[4].type = "write" - else - log.accesses[4].type = "read" - end - end) - end) - - it("should reject wrong root_hash_after", function() - local machine = build_default_machine() - local initial_hash = machine:get_root_hash() - local log = machine:log_step_uarch() - expect.fail(function() - machine:verify_step_uarch(initial_hash, log, bad_hash) - end, "mismatch in root hash after replay") - end) - end) - - describe("TLB write via ecall", function() - it("should reject wrong type", function() - tlb_step_should_fail("expected", function(log, idx) - log.accesses[idx].type = "read" - end) - end) - - it("should reject wrong address", function() - tlb_step_should_fail("at address", function(log, idx) - log.accesses[idx].address = 0 - end) - end) - - it("should reject corrupt siblings", function() - tlb_step_should_fail("siblings and read hash do not match root hash", function(log, idx) - log.accesses[idx].sibling_hashes[1] = bad_hash - end) - end) - - it("should reject missing written_hash", function() - tlb_step_should_fail("missing written hash of tlb.slot", function(log, idx) - log.accesses[idx].written_hash = nil - end) - end) - - it("should reject wrong written_hash", function() - tlb_step_should_fail("written hash for tlb.slot does not match expected hash", function(log, idx) - log.accesses[idx].written_hash = bad_hash - end) - end) - - it("should reject corrupt read data", function() - tlb_step_should_fail("read data for tlb.slot does not match read hash", function(log, idx) - local size = 1 << log.accesses[idx].log2_size - if log.accesses[idx].read then - log.accesses[idx].read = string.rep("\xff", #log.accesses[idx].read) - else - log.accesses[idx].read = string.rep("\xff", size) - end - end) - end) - - it("should reject corrupt written data", function() - tlb_step_should_fail("written data for tlb.slot does not match written hash", function(log, idx) - local size = 1 << log.accesses[idx].log2_size - if log.accesses[idx].written then - log.accesses[idx].written = string.rep("\xff", #log.accesses[idx].written) - else - log.accesses[idx].written = string.rep("\xff", size) - end - end) - end) - end) -end) - --- The access pattern for send_cmio_response with non-empty data is: --- 1: read iflags.Y (check_read) --- 2: write cmio rx buffer (do_write_memory_with_padding) --- 3: write htif.fromhost (check_write) --- 4: write iflags.Y (check_write) - -describe("verify_send_cmio_response", function() - describe("log structure", function() - it("should reject empty access log", function() - cmio_should_fail("the access log has no accesses", function(log) - log.accesses = {} - end) - end) - - it("should reject extra access at end", function() - cmio_should_fail("access log was not fully consumed", function(log) - log.accesses[#log.accesses + 1] = log.accesses[1] - end) - end) - - it("should reject truncated log (missing last access)", function() - cmio_should_fail("too few accesses in log", function(log) - log.accesses[#log.accesses] = nil - end) - end) - - it("should reject truncated log (missing buffer write)", function() - -- Keep only the first access (read iflags.Y), so do_write_memory_with_padding - -- hits "too few accesses in log" - cmio_should_fail("too few accesses in log", function(log) - while #log.accesses > 1 do - log.accesses[#log.accesses] = nil - end - end) - end) - - it("should reject truncated log (missing check_read)", function() - -- With zero-length data, accesses are: read iflags.Y, write htif.fromhost, write iflags.Y - -- Keep only the first two so check_write for iflags.Y hits "too few accesses" - cmio_should_fail("too few accesses in log", function(log) - log.accesses[#log.accesses] = nil - end, { data = "" }) - end) - - it("should reject wrong root_hash_after", function() - local machine = build_cmio_machine() - machine:write_reg("iflags_Y", 1) - local reason = 1 - local data = "test" - local initial_hash = machine:get_root_hash() - local log = machine:log_send_cmio_response(reason, data) - expect.fail(function() - machine:verify_send_cmio_response(reason, data, initial_hash, log, bad_hash) - end, "mismatch in root hash after replay") - end) - end) - - describe("check_read (access 1: read iflags.Y)", function() - it("should reject wrong type", function() - cmio_should_fail("expected 1st access to read iflags.Y", function(log) - log.accesses[1].type = "write" - end) - end) - - it("should reject wrong address", function() - cmio_should_fail("expected 1st access to read iflags.Y address", function(log) - log.accesses[1].address = 0 - end) - end) - - it("should reject wrong log2_size", function() - cmio_should_fail("expected 1st access to read 2^", function(log) - log.accesses[1].log2_size = 2 - end) - end) - - it("should reject missing read data", function() - cmio_should_fail("missing read iflags.Y data at 1st access", function(log) - log.accesses[1].read = nil - end) - end) - - it("should reject wrong read data size", function() - cmio_should_fail("expected read iflags.Y data to contain 2^", function(log) - log.accesses[1].read = "\0" - end) - end) - - it("should reject read data that does not hash to read_hash", function() - cmio_should_fail("logged read data of iflags.Y data does not hash to the logged read hash", function(log) - log.accesses[1].read = string.rep("\xff", #log.accesses[1].read) - end) - end) - - it("should reject corrupt sibling hash", function() - cmio_should_fail("Mismatch in root hash of 1st access", function(log) - log.accesses[1].sibling_hashes[1] = bad_hash - end) - end) - end) - - describe("do_write_memory_with_padding (access 2: write cmio rx buffer)", function() - it("should reject wrong type", function() - cmio_should_fail("expected 2nd access to write cmio rx buffer", function(log) - log.accesses[2].type = "read" - end) - end) - - it("should reject wrong address", function() - cmio_should_fail("expected address of 2nd access to match address of cmio rx buffer", function(log) - log.accesses[2].address = 0 - end) - end) - - it("should reject wrong log2_size", function() - cmio_should_fail("expected 2nd access to write 2^", function(log) - log.accesses[2].log2_size = 2 - end) - end) - - it("should reject corrupt read data", function() - cmio_should_fail("hash of read data and read hash at 2nd access does not match", function(log) - local size = 1 << log.accesses[2].log2_size - log.accesses[2].read = string.rep("\xff", size) - end) - end) - - it("should reject missing written_hash", function() - cmio_should_fail("write 2nd access has no written hash", function(log) - log.accesses[2].written_hash = nil - end) - end) - - it("should reject wrong written_hash", function() - cmio_should_fail( - "logged written hash of cmio rx buffer does not match the hash of data argument", - function(log) - log.accesses[2].written_hash = bad_hash - end - ) - end) - - it("should reject corrupt written data", function() - cmio_should_fail("written hash and written data mismatch at 2nd access", function(log) - local size = 1 << log.accesses[2].log2_size - log.accesses[2].written = string.rep("\xff", size) - end) - end) - - it("should reject corrupt sibling hash", function() - cmio_should_fail("Mismatch in root hash of 2nd access", function(log) - log.accesses[2].sibling_hashes[1] = bad_hash - end) - end) - end) - - describe("check_write (access 3: write htif.fromhost)", function() - it("should reject wrong type", function() - cmio_should_fail("expected 3rd access to write htif.fromhost", function(log) - log.accesses[3].type = "read" - end) - end) - - it("should reject wrong address", function() - cmio_should_fail("expected 3rd access to write htif.fromhost to address", function(log) - log.accesses[3].address = 0 - end) - end) - - it("should reject wrong log2_size", function() - cmio_should_fail("expected 3rd access to write 2^", function(log) - log.accesses[3].log2_size = 2 - end) - end) - - it("should reject missing read data", function() - cmio_should_fail("missing read htif.fromhost data at 3rd access", function(log) - log.accesses[3].read = nil - end) - end) - - it("should reject wrong read data size", function() - cmio_should_fail("expected overwritten data from htif.fromhost to contain 2^", function(log) - log.accesses[3].read = "\0" - end) - end) - - it("should reject read data that does not hash to read_hash", function() - cmio_should_fail("logged read data of htif.fromhost does not hash to the logged read hash", function(log) - log.accesses[3].read = string.rep("\xff", #log.accesses[3].read) - end) - end) - - it("should reject missing written_hash", function() - cmio_should_fail("missing written htif.fromhost hash at 3rd access", function(log) - log.accesses[3].written_hash = nil - end) - end) - - it("should reject missing written data", function() - cmio_should_fail("missing written htif.fromhost data at 3rd access", function(log) - log.accesses[3].written = nil - end) - end) - - it("should reject wrong written data size", function() - cmio_should_fail("expected written htif.fromhost data to contain 2^", function(log) - log.accesses[3].written = "\0" - end) - end) - - it("should reject written data that does not hash to written_hash", function() - cmio_should_fail( - "logged written data of htif.fromhost does not hash to the logged written hash", - function(log) - log.accesses[3].written = string.rep("\xff", #log.accesses[3].written) - end - ) - end) - - it("should reject value that does not match logged written value", function() - cmio_should_fail("value being written to htif.fromhost does not match", function(log) - local a = log.accesses[3] - local new_written = string.rep("\x42", #a.written) - a.written = new_written - a.written_hash = cartesi.keccak256(new_written) - end) - end) - - it("should reject written data that differs from read in unexpected way", function() - cmio_should_fail("doesn't differ from the logged read data only by the written word", function(log) - local a = log.accesses[3] - -- htif.fromhost is at offset 16 within the 32-byte leaf, - -- so we corrupt byte 0 (outside the written word) while keeping the word intact - local corrupted = string.char(a.written:byte(1) ~ 0xff) .. a.written:sub(2) - a.written = corrupted - a.written_hash = cartesi.keccak256(corrupted) - end) - end) - - it("should reject corrupt sibling hash", function() - cmio_should_fail("Mismatch in root hash of 3rd access", function(log) - log.accesses[3].sibling_hashes[1] = bad_hash - end) - end) - end) - - describe("ordinal coverage (4th access: write iflags.Y)", function() - it("should reject wrong type on 4th access", function() - cmio_should_fail("expected 4th access to write iflags.Y", function(log) - log.accesses[4].type = "read" - end) - end) - end) - - describe("zero-length data (no buffer write)", function() - it("should reject wrong type on 2nd access", function() - cmio_should_fail("expected 2nd access to write htif.fromhost", function(log) - log.accesses[2].type = "read" - end, { data = "" }) - end) - - it("should reject wrong type on 3rd access", function() - cmio_should_fail("expected 3rd access to write iflags.Y", function(log) - log.accesses[3].type = "read" - end, { data = "" }) - end) - end) -end) diff --git a/tests/lua/test-spec.lua b/tests/lua/test-spec.lua index 2c427a180..c9e7347ef 100755 --- a/tests/lua/test-spec.lua +++ b/tests/lua/test-spec.lua @@ -22,7 +22,6 @@ local lester = require("cartesi.third-party.lester") lester.parse_args() require("spec-verify-step-failure") -require("spec-verify-uarch-failure") require("spec-fuzzer-bugs") require("spec-hash-functions") require("spec-hash-tree") diff --git a/tests/lua/uarch-riscv-tests.lua b/tests/lua/uarch-riscv-tests.lua index 1ebe4c94c..c7a969a5f 100755 --- a/tests/lua/uarch-riscv-tests.lua +++ b/tests/lua/uarch-riscv-tests.lua @@ -17,9 +17,9 @@ -- local cartesi = require("cartesi") -local util = require("cartesi.util") local test_util = require("cartesi.tests.util") local parallel = require("cartesi.parallel") +local manifest_mod = require("cartesi.tests.step_log_manifest") -- Tests Cases -- format {"ram_image_file", number_of_uarch_cycles, expected_error_pattern} @@ -76,9 +76,9 @@ local riscv_tests = { { "rv64ui-uarch-xori.bin", 177 }, { "rv64ui-uarch-fence.bin", 12 }, { "rv64ui-uarch-ecall-putchar.bin", 14 }, - { "rv64ui-uarch-ecall-mark-page-dirty.bin", 15 }, { "rv64ui-uarch-ecall-write-tlb.bin", 46 }, { "rv64ui-uarch-ecall-unsupported.bin", 1, "unsupported ecall function" }, + { "rv64ui-uarch-ecall-removed-mark-page-dirty.bin", 1, "unsupported ecall function" }, { "rv64ui-uarch-ebreak.bin", 1, "uarch aborted" }, } @@ -98,14 +98,14 @@ where options are: --jobs= run N tests in parallel (default: 1, i.e., run tests sequentially) - --output-dir= - write json logs to this directory - --create-reset-uarch-log - create a json log file for a uarch reset operation - valid only for the json-step-logs command ---create-send-cmio-response-log - create a json log file for a send_cmio_response operation - valid only for the json-step-logs command + --output-dir= + destination directory for the recorded fixtures + (required for the record_* commands; each command writes one homogeneous + fixture set directly into ) + --per-cycle-logs + record_uarch_tests only: write one step log per uarch cycle into + //NNNNN.log instead of the per-program batched log. + Each per-test directory gets its own _manifest.csv with cycle rows. and command can be: run run test and report errors @@ -113,10 +113,14 @@ and command can be: list list tests selected by the test - json-step-logs - generate json log files for every step of the selected tests - the files are written to the directory specified by --output-dir - these log files are used by Solidity unit tests + record_uarch_tests + record one step log per uarch test into . Default granularity + is one log per whole test (batched); pass --per-cycle-logs to emit one log + per cycle instead. Writes /_manifest.csv with program rows + (batched mode) or per-test subdirectories with cycle manifests (per-cycle). + + (uarch reset and send_cmio_response fixtures are machine-level dispute operations, + not uarch instruction steps; see tests/lua/record-send-cmio-response.lua and record-reset-uarch.lua) ]=], arg[0] )) @@ -125,10 +129,9 @@ end local test_path = test_util.tests_uarch_path local test_pattern = ".*" -local output_dir local jobs = 1 -local create_uarch_reset_log = false -local create_send_cmio_response_log = false +local output_dir +local per_cycle_logs = false local options = { { @@ -150,63 +153,53 @@ local options = { end, }, { - "^%-%-create%-reset%-uarch%-log$", - function(all) - if not all then - return false - end - create_uarch_reset_log = true - return true - end, - }, - { - "^%-%-create%-send%-cmio%-response%-log$", - function(all) - if not all then + "^%-%-test%-path%=(.*)$", + function(o) + if not o or #o < 1 then return false end - create_send_cmio_response_log = true + test_path = o return true end, }, { - "^%-%-output%-dir%=(.*)$", + "^%-%-test%=(.*)$", function(o) if not o or #o < 1 then return false end - output_dir = o + test_pattern = o return true end, }, { - "^%-%-test%-path%=(.*)$", + "^%-%-jobs%=([0-9]+)$", function(o) if not o or #o < 1 then return false end - test_path = o + jobs = assert(tonumber(o)) + assert(jobs and jobs >= 1, "invalid number of jobs") return true end, }, { - "^%-%-test%=(.*)$", + "^%-%-output%-dir%=(.*)$", function(o) if not o or #o < 1 then return false end - test_pattern = o + output_dir = o return true end, }, { - "^%-%-jobs%=([0-9]+)$", - function(o) - if not o or #o < 1 then + "^%-%-per%-cycle%-logs$", + function(all) + if not all then return false end - jobs = assert(tonumber(o)) - assert(jobs and jobs >= 1, "invalid number of jobs") + per_cycle_logs = true return true end, }, @@ -269,13 +262,6 @@ local FAILED_TEST_CASE_X = 3 -- If test fails, the value of this register contai local TEST_SUCCEEDED = 0xbe1e7aaa -- Value indicating that test has passed local TEST_FAILED = 0xdeadbeef -- Value indicating that test has failed -local function read_all(path) - local file = assert(io.open(path, "rb")) - local contents = file:read("*a") - file:close() - return contents -end - local function check_test_result(machine, ctx) local actual_cycle = machine:read_reg("uarch_cycle") if ctx.uarch_run_success then @@ -367,262 +353,153 @@ local function list(tests) end end -local function select_test(test_name, patt) - local i, j = test_name:find(patt) - if i == 1 and j == #test_name then - return true - end - i, j = test_name:find(patt, 1, true) - return i == 1 and j == #test_name -end - -local function make_json_log_file_name(test_name, suffix) - return test_name .. (suffix or "") .. ".json" +local function step_log_file_name(test_name) + return test_name .. ".log" end -local function create_json_log_file(test_name, suffix) - local file_path = output_dir .. "/" .. make_json_log_file_name(test_name, suffix) - return assert(io.open(file_path, "w"), "error opening file " .. file_path) -end - -local function open_steps_json_log(test_name) - return create_json_log_file(test_name, "-steps") +-- Manifest schema + parallel-fragment helpers live in cartesi.tests.step_log_manifest +-- (shared with the machine-level generator). The cmio `data` column stays ASCII; +-- see that module for the CSV-safety contract. + +-- Records a step log for one uarch test. Mutates ctx with the captured +-- root hashes; self-checks the recorded log via verify_step_uarch. +local function record_test_step_log(machine, ctx) + ctx.log_file = step_log_file_name(ctx.test_name) + ctx.kind = "program" + ctx.name = ctx.log_file + ctx.hash_function = "keccak256" + assert(os.execute("mkdir -p " .. output_dir)) + local log_path = output_dir .. "/" .. ctx.log_file + os.remove(log_path) + ctx.initial_root_hash = machine:get_root_hash() + -- 2x expected cycles so an overrun bug shows up in actual_cycle rather than + -- being clipped silently at the expected boundary. + ctx.requested_cycle_count = 2 * ctx.expected_cycles + machine:log_step_uarch(ctx.requested_cycle_count, log_path) + ctx.final_root_hash = machine:get_root_hash() + ctx.uarch_run_success = true + cartesi.machine:verify_step_uarch(ctx.initial_root_hash, log_path, ctx.requested_cycle_count, ctx.final_root_hash) end -local function write_sibling_hashes_to_log(sibling_hashes, out, indent) - util.indentout(out, indent, '"sibling_hashes": [\n') - for i, h in ipairs(sibling_hashes) do - util.indentout(out, indent + 1, '"%s"', util.hexhash(h)) - if sibling_hashes[i + 1] then - out:write(",\n") +-- Records one step log per uarch cycle in //. +-- Runs cycle by cycle until the machine halts or uarch_cycle overflows. +-- Each log captures a single cycle's transition, matching the production dispute +-- path (one uarch_step per transition). +local function record_per_cycle_step_logs(ram_image, ctx) + local per_cycle_dir = ctx.test_name + local dir_abs = output_dir .. "/" .. per_cycle_dir + os.execute("rm -rf " .. dir_abs) + assert(os.execute("mkdir -p " .. dir_abs), "mkdir " .. dir_abs) + local manifest = assert(io.open(dir_abs .. "/" .. manifest_mod.MANIFEST_NAME, "w")) + manifest:write(manifest_mod.HEADER) + local machine = build_machine(ram_image) + local cycle = 0 + local before_hash = machine:get_root_hash() + while true do + local cycle_name = string.format("%05d.log", cycle) + local cycle_path = dir_abs .. "/" .. cycle_name + local status = machine:log_step_uarch(1, cycle_path) + if status == cartesi.UARCH_BREAK_REASON_REACHED_TARGET_CYCLE then + local after_hash = machine:get_root_hash() + manifest_mod.write_row(manifest, { + kind = "cycle", + name = cycle_name, + hash_function = "keccak256", + requested_cycle_count = 1, + initial_root_hash = before_hash, + final_root_hash = after_hash, + }) + before_hash = after_hash + cycle = cycle + 1 else - out:write("\n") - end - end - util.indentout(out, indent, "]\n") -end - -local function write_access_to_log(access, out, indent, last) - util.indentout(out, indent, "{\n") - util.indentout(out, indent + 1, '"type": "%s",\n', access.type) - util.indentout(out, indent + 1, '"address": %u,\n', access.address) - util.indentout(out, indent + 1, '"log2_size": %u,\n', access.log2_size) - local read_value = "" -- Solidity JSON parser breaks, if this field is null - if access.read then - read_value = util.hexstring(access.read) - end - util.indentout(out, indent + 1, '"read_value": "%s",\n', read_value) - util.indentout(out, indent + 1, '"read_hash": "%s",\n', util.hexhash(access.read_hash)) - local written_value = "" - local written_hash = "" - if access.type == "write" then - written_hash = util.hexhash(access.written_hash) - if access.written then - written_value = util.hexstring(access.written) - end - end - util.indentout(out, indent + 1, '"written_value": "%s",\n', written_value) - util.indentout(out, indent + 1, '"written_hash": "%s"', written_hash) - if access.sibling_hashes then - out:write(",\n") - write_sibling_hashes_to_log(access.sibling_hashes, out, indent + 2) - else - out:write("\n") - end - util.indentout(out, indent, "}") - if not last then - out:write(",") - end - out:write("\n") -end - -local function write_log_to_file(log, out, indent, last) - local n = #log.accesses - util.indentout(out, indent, "{\n") - util.indentout(out, indent + 1, '"accesses": [\n') - for i, access in ipairs(log.accesses) do - write_access_to_log(access, out, indent + 2, i == n) - end - util.indentout(out, indent + 1, "]\n") - util.indentout(out, indent, "}") - if not last then - out:write(",") - end - out:write("\n") -end - -local function catalog_entry_file_name(name) - return output_dir .. "/" .. make_json_log_file_name(name, "-catalog-entry") -end - -local function write_catalog_json_log_entry(out, logFilename, ctx) - util.indentout( - out, - 1, - '{"logFilename": "%s", "binaryFilename": "%s", "steps": %d, ' - .. '"initialRootHash": "%s", "finalRootHash": "%s"}', - logFilename, - ctx.ram_image or "", - ctx.step_count, - util.hexhash(ctx.initial_root_hash), - util.hexhash(ctx.final_root_hash) - ) -end - -local function create_catalog_json_log_entry(ctx) - local out = create_json_log_file(ctx.test_name, "-catalog-entry") - local logFilename = make_json_log_file_name(ctx.test_name, "-steps") - write_catalog_json_log_entry(out, logFilename, ctx) - out:close() -end - -local function run_machine_writing_json_logs(machine, ctx) - local test_name = ctx.test_name - local max_cycle = ctx.expected_cycles * 2 - local out = open_steps_json_log(test_name) - local indent = 0 - util.indentout(out, indent, '{ "steps":[\n') - local step_count = 0 - while math.ult(machine:read_reg("uarch_cycle"), max_cycle) do - local log = machine:log_step_uarch() - step_count = step_count + 1 - local halted = machine:read_reg("uarch_halt_flag") ~= 0 - write_log_to_file(log, out, indent + 1, halted) - if halted then + -- The machine was already halted (or cycle overflowed): no transition happened. + -- Discard the no-op log so per-cycle replay only sees genuine cycles. + os.remove(cycle_path) break end end - ctx.step_count = step_count + -- Confirm the test passed, anchoring the fixture's validity at generation (the same + -- check the batched recorder runs); consumers then only reproduce the recorded roots. ctx.uarch_run_success = true - util.indentout(out, indent, "]}\n") - out:close() + check_test_result(machine, ctx) + ctx.per_cycle_dir = per_cycle_dir + ctx.actual_cycle_count = cycle end -local function create_json_reset_log() - local machine = build_machine() - local test_name = "reset-uarch" - machine:write_reg("uarch_halt_flag", 1) - local initial_root_hash = machine:get_root_hash() - local log = machine:log_reset_uarch() - local out = create_json_log_file(test_name .. "-steps") - write_log_to_file(log, out, 0, true) - out:close() - local ctx = { - initial_root_hash = initial_root_hash, - final_root_hash = machine:get_root_hash(), - ram_image = "", - test_name = test_name, - expected_cycles = 1, - step_count = 1, - failed = false, - accesses_count = #log.accesses, - } - return ctx -end +-- uarch reset and send_cmio_response are machine-level dispute operations, not uarch +-- instruction steps; their fixtures come from tests/lua/record-send-cmio-response.lua +-- and record-reset-uarch.lua. -local function create_json_send_cmio_response_log() - local machine = build_machine() - local test_name = "send-cmio-response" - local response_data = "This is a test cmio response" - local reason = 1 - machine:write_reg("iflags_Y", 1) - local initial_root_hash = machine:get_root_hash() - local log = machine:log_send_cmio_response(reason, response_data) - local out = create_json_log_file(test_name .. "-steps") - write_log_to_file(log, out, 0, true) - out:close() - local ctx = { - initial_root_hash = initial_root_hash, - final_root_hash = machine:get_root_hash(), - ram_image = "", - test_name = test_name, - expected_cycles = 1, - step_count = 1, - failed = false, - accesses_count = #log.accesses, - } - return ctx -end - -local function json_step_logs(tests) - assert(output_dir, "output-dir is required for json-logs") - -- filter out tests that intentionally produce runtime errors - -- They represent bug conditions that are not supposed to be logged +-- Record one binary step log per uarch test into . Granularity +-- chosen by --per-cycle-logs: default writes one log per whole test (batched); +-- with the flag, writes one log per cycle into //. +-- Tests with an expected_error_pattern (runtime-error tests) are skipped. +local function record_uarch_tests(tests) + assert(output_dir, "--output-dir is required for record_uarch_tests") local loggable_tests = {} for _, test in ipairs(tests) do - local expected_error_pattern = test[3] - if not expected_error_pattern then + if not test[3] then loggable_tests[#loggable_tests + 1] = test end end - -- note: function may run in a separate process + if per_cycle_logs then + -- Per-cycle mode: each test produces //.log + -- plus its own _manifest.csv. No top-level manifest; consumers discover + -- per-test dirs by iterating the matching uarch-tests manifest. + local failures = parallel.run(loggable_tests, jobs, function(test) + local ctx = { + ram_image = test[1], + test_name = test[1]:gsub("%.bin$", ""), + expected_cycles = test[2], + } + record_per_cycle_step_logs(ctx.ram_image, ctx) + end) + if failures ~= nil and failures > 0 then + stderr("\nFAILED %d of %d tests\n\n", failures, #loggable_tests) + os.exit(1) + end + stderr("\nPASSED all %d tests (per-cycle)\n\n", #loggable_tests) + os.exit(0) + end + + -- Batched mode: each test produces /.log; manifest + -- rows accumulate from worker fragments, then merge into /_manifest.csv. local failures = parallel.run(loggable_tests, jobs, function(test) local ctx = { ram_image = test[1], - test_name = test[1]:gsub(".bin$", ""), + test_name = test[1]:gsub("%.bin$", ""), expected_cycles = test[2], - failed = true, - step_count = 0, - accesses_count = 0, + uarch_run_success = false, } local machine = build_machine(ctx.ram_image) - ctx.initial_root_hash = machine:get_root_hash() - run_machine_writing_json_logs(machine, ctx) - ctx.final_root_hash = machine:get_root_hash() + record_test_step_log(machine, ctx) check_test_result(machine, ctx) - create_catalog_json_log_entry(ctx) + manifest_mod.write_fragment(output_dir, ctx.test_name, ctx) end) - - -- create additional logs not in the `tests` list - local contexts = {} - if create_uarch_reset_log then - local ctx = create_json_reset_log() - contexts[#contexts + 1] = ctx - end - if create_send_cmio_response_log then - local ctx = create_json_send_cmio_response_log() - contexts[#contexts + 1] = ctx + if failures ~= nil and failures > 0 then + stderr("\nFAILED %d of %d tests\n\n", failures, #loggable_tests) + os.exit(1) end - -- build catalog - - -- gather catalog entries from files - local out = create_json_log_file("catalog") - out:write("[\n") + assert(os.execute("mkdir -p " .. output_dir)) + local test_names = {} for _, test in ipairs(loggable_tests) do - local test_name = test[1]:gsub(".bin$", "") - local filename = catalog_entry_file_name(test_name) - local contents = read_all(filename) - out:write(contents) - out:write(",\n") - os.remove(filename) + test_names[#test_names + 1] = test[1]:gsub("%.bin$", "") end + manifest_mod.concat_fragments(output_dir, test_names) - -- gather remaining entries - for i, ctx in ipairs(contexts) do - local logFilename = make_json_log_file_name(ctx.test_name, "-steps") - write_catalog_json_log_entry(out, logFilename, ctx) - if i == #contexts then - out:write("\n") - else - out:write(",\n") - end - end - - out:write("]\n") - out:close() + stderr("\nPASSED all %d tests\n\n", #loggable_tests) + os.exit(0) +end - -- print summary - if failures ~= nil then - if failures > 0 then - stderr("\nFAILED %d of %d tests\n\n", failures, #loggable_tests) - os.exit(1) - else - stderr("\nPASSED all %d tests\n\n", #loggable_tests) - os.exit(0) - end +local function select_test(test_name, patt) + local i, j = test_name:find(patt) + if i == 1 and j == #test_name then + return true end + i, j = test_name:find(patt, 1, true) + return i == 1 and j == #test_name end local selected_tests = {} @@ -638,8 +515,8 @@ elseif command == "run" then run(selected_tests) elseif command == "list" then list(selected_tests) -elseif command == "json-step-logs" then - json_step_logs(selected_tests) +elseif command == "record_uarch_tests" then + record_uarch_tests(selected_tests) else error("command not found") end diff --git a/tests/machine/src/step_max_pages.S b/tests/machine/src/step_max_pages.S new file mode 100644 index 000000000..af6592b1b --- /dev/null +++ b/tests/machine/src/step_max_pages.S @@ -0,0 +1,140 @@ +/* Copyright Cartesi and individual authors (see AUTHORS) + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Worst-case step-log page footprint for a single mcycle. + * + * One mcycle records every distinct 4 KiB page the interpreter reads or writes + * while executing one big-machine instruction. This program maximizes that set: + * it runs a load in S-mode under sv48 (4-level page walk -- the deepest mode the + * Cartesi machine supports; sv57 is compiled out via NO_SATP_MODE_SV57), with the + * code and data virtual addresses routed through disjoint page-table pages, and an + * sfence.vma immediately before the load so the TLB is cold and both the fetch and + * the load perform full table walks during the recorded mcycle. + * + * vaddr layout (sv48): VPN3|VPN2|VPN1|VPN0|offset, 9 bits each + 12. + * code vaddr Vc = 1<<39 (VPN3=1, rest 0) + * data vaddr Vd = 2<<39 (VPN3=2, rest 0) + * Sharing only the root table page, each walk touches 3 further distinct table + * pages, so fetch + load span 7 table pages + 1 code page + 1 data page, plus the + * shadow-registers page, the PMA page, and the CODE/READ TLB shadow pages. + */ + +#include +#include + +#define SATP_MODE_SHIFT 60 +#define MSTATUS_MPP_MASK 0x1800 +#define MSTATUS_MPP_S 0x800 + +#define VC (1 << 39) +#define VD (2 << 39) + +#define PTE_PTR PTE_V +#define PTE_LEAF_RX (PTE_V | PTE_R | PTE_X | PTE_A | PTE_D) +#define PTE_LEAF_RW (PTE_V | PTE_R | PTE_W | PTE_A | PTE_D) + +/* write a PTE: parent[idx] = (phys(target) >> 12 << 10) | flags */ +.macro set_pte parent, idx, target, flags + la t1, \target + srli t1, t1, 12 + slli t1, t1, 10 + li t2, \flags + or t1, t1, t2 + la t0, \parent + li t3, (\idx) * 8 + add t0, t0, t3 + sd t1, (t0) +.endm + +.section .text.init +.align 2; +.global _start; +_start: + // M-mode trap handler (catches the supervisor ecall used to exit) + la t0, m_trap + csrw mtvec, t0 + + // Build the sv48 page tables. + // root[1] -> l1c ; root[2] -> l1d (diverge at the root index VPN3) + set_pte root_table, 1, l1c, PTE_PTR + set_pte root_table, 2, l1d, PTE_PTR + // code chain: l1c[0]->l2c->l3c->code_page (l3c[0] is the leaf) + set_pte l1c, 0, l2c, PTE_PTR + set_pte l2c, 0, l3c, PTE_PTR + set_pte l3c, 0, code_page, PTE_LEAF_RX + // data chain: l1d[0]->l2d->l3d->data_page (l3d[0] is the leaf) + set_pte l1d, 0, l2d, PTE_PTR + set_pte l2d, 0, l3d, PTE_PTR + set_pte l3d, 0, data_page, PTE_LEAF_RW + + // satp = sv48 | (root_table >> 12) + la t0, root_table + srli t0, t0, 12 + li t1, (SATP_MODE_SV48 << SATP_MODE_SHIFT) + or t0, t0, t1 + csrw satp, t0 + + // Enter S-mode at Vc with a0 = Vd. + li a0, VD + li t0, MSTATUS_MPP_MASK + csrc mstatus, t0 + li t0, MSTATUS_MPP_S + csrs mstatus, t0 + li t0, VC + csrw mepc, t0 + mret + +// M-mode trap: supervisor ecall exits 0, anything else exits 1. +m_trap: + csrr t0, mcause + li t1, CAUSE_SUPERVISOR_ECALL + beq t0, t1, exit_ok + li gp, 1 + j exit +exit_ok: + li gp, 0 + j exit + +// HTIF exit, code in gp. +exit: + slli gp, gp, 16 + srli gp, gp, 15 + ori gp, gp, 1 +1: + li t0, AR_HTIF_START_DEF + sd gp, 0(t0) + j 1b + +// The S-mode payload, page-aligned so Vc (offset 0) lands on its first insn. +.balign 4096 +.global code_page +code_page: + sfence.vma // flush TLB: the next mcycle re-walks cold + ld x1, 0(a0) // worst mcycle: cold fetch walk + cold load walk + ecall // trap to M-mode to exit +1: j 1b + +// sv48 table pages and the data page (zeroed scratch; filled at runtime). +.section .bss +.balign 4096 +root_table: .skip 4096 +l1c: .skip 4096 +l2c: .skip 4096 +l3c: .skip 4096 +l1d: .skip 4096 +l2d: .skip 4096 +l3d: .skip 4096 +data_page: .skip 4096 diff --git a/tests/machine/src/step_max_pages_flush.S b/tests/machine/src/step_max_pages_flush.S new file mode 100644 index 000000000..7b3b3de0b --- /dev/null +++ b/tests/machine/src/step_max_pages_flush.S @@ -0,0 +1,67 @@ +/* Copyright Cartesi and individual authors (see AUTHORS) + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Worst-case step-log page footprint for a single mcycle: a TLB flush. + * + * When sfence.vma (or a satp write) flushes the write TLB, the recorder marks + * every valid write entry's page dirty and records it. The write TLB has 256 + * slots (slot = (vaddr>>12) & 255), so first dirtying 256 distinct pages on 256 + * distinct slots and then flushing makes one mcycle touch all 256 of them plus + * the 6 TLB shadow pages, the shadow-registers page, and the PMA page. + * + * This needs no virtual memory: in M-mode (bare) the TLB still caches the + * identity mapping, so the stores populate the write TLB directly. + */ + +#include + +#define NPAGES 256 +#define BASE 0x80100000 // 1 MiB into RAM, clear of this code + +/* Stride between dirtied pages. The default (one page) keeps them contiguous; + * a stride of 257 pages (0x101000) still lands each on a distinct write-TLB slot + * (slot = (vaddr>>12) & 255) but scatters them ~1 MiB apart to inflate the merkle + * sibling count. Override with -DSTRIDE_BYTES=... ; the larger stride needs a RAM + * large enough to hold BASE + NPAGES*STRIDE_BYTES. */ +#ifndef STRIDE_BYTES +#define STRIDE_BYTES 0x1000 +#endif + +.section .text.init +.align 2; +.global _start; +_start: + li a0, BASE + li a1, NPAGES + li a2, 0 + li a3, STRIDE_BYTES +1: // dirty NPAGES distinct pages -> NPAGES distinct write-TLB slots + sd a2, 0(a0) + add a0, a0, a3 + addi a2, a2, 1 + blt a2, a1, 1b + + sfence.vma // worst mcycle: flush records every dirty cached page + + li gp, 0 +exit: + slli gp, gp, 16 + srli gp, gp, 15 + ori gp, gp, 1 +1: + li t0, AR_HTIF_START_DEF + sd gp, 0(t0) + j 1b diff --git a/tests/misc/test-machine-c-api.cpp b/tests/misc/test-machine-c-api.cpp index 6f885d224..1df180097 100644 --- a/tests/misc/test-machine-c-api.cpp +++ b/tests/misc/test-machine-c-api.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1117,18 +1118,18 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(verify_hash_tree_basic_test, ordinary_machine_fix BOOST_CHECK(ret); } -BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_log_null_log_test, default_machine_fixture) { - cm_error error_code = cm_verify_step_uarch(nullptr, nullptr, nullptr, nullptr); +BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_filename_test, default_machine_fixture) { + cm_error error_code = cm_verify_step_uarch(nullptr, nullptr, nullptr, 0, nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); std::string result = cm_get_last_error_message(); - std::string origin("invalid access log"); + std::string origin("invalid log_filename"); BOOST_CHECK_EQUAL(origin, result); } -class access_log_machine_fixture : public incomplete_machine_fixture { +class step_log_machine_fixture : public incomplete_machine_fixture { public: - access_log_machine_fixture() : _log_type(CM_ACCESS_LOG_TYPE_ANNOTATIONS) { + step_log_machine_fixture() { _machine_dir_path = (std::filesystem::temp_directory_path() / "661b6096c377cdc07756df488059f4407c8f4").string(); uint32_t test_uarch_ram[] = { @@ -1145,41 +1146,60 @@ class access_log_machine_fixture : public incomplete_machine_fixture { cm_create_new(dumped_config.c_str(), nullptr, nullptr, &_machine); } - ~access_log_machine_fixture() { + ~step_log_machine_fixture() { + for (const auto &f : _temp_logs) { + std::filesystem::remove(f); + } cm_delete(_machine); std::filesystem::remove_all(_machine_dir_path); std::filesystem::remove_all(_uarch_ram_path); } - access_log_machine_fixture(const access_log_machine_fixture &other) = delete; - access_log_machine_fixture(access_log_machine_fixture &&other) noexcept = delete; - access_log_machine_fixture &operator=(const access_log_machine_fixture &other) = delete; - access_log_machine_fixture &operator=(access_log_machine_fixture &&other) noexcept = delete; + step_log_machine_fixture(const step_log_machine_fixture &other) = delete; + step_log_machine_fixture(step_log_machine_fixture &&other) noexcept = delete; + step_log_machine_fixture &operator=(const step_log_machine_fixture &other) = delete; + step_log_machine_fixture &operator=(step_log_machine_fixture &&other) noexcept = delete; protected: + // Returns a fresh path; the fixture removes the file on destruction. The path is + // removed eagerly so cm_log_step_uarch's "file already exists" check passes. + std::string make_log_filename() { + auto path = + (std::filesystem::temp_directory_path() / ("step-log-" + std::to_string(_next_log_id++) + ".bin")).string(); + std::filesystem::remove(path); + _temp_logs.push_back(path); + return path; + } + std::string _machine_dir_path; const std::string _uarch_ram_path = "/tmp/test-uarch-ram.bin"; - const char *_access_log{}; - int _log_type{}; + std::vector _temp_logs; + int _next_log_id{}; }; -BOOST_FIXTURE_TEST_CASE_NOLINT(step_null_machine_test, access_log_machine_fixture) { - cm_error error_code = cm_log_step_uarch(nullptr, _log_type, &_access_log); +BOOST_FIXTURE_TEST_CASE_NOLINT(step_null_machine_test, step_log_machine_fixture) { + const auto filename = make_log_filename(); + cm_error error_code = cm_log_step_uarch(nullptr, 1, filename.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); } -BOOST_FIXTURE_TEST_CASE_NOLINT(step_null_access_log_test, access_log_machine_fixture) { - cm_error error_code = cm_log_step_uarch(_machine, _log_type, nullptr); +BOOST_FIXTURE_TEST_CASE_NOLINT(step_null_filename_test, step_log_machine_fixture) { + cm_error error_code = cm_log_step_uarch(_machine, 1, nullptr, nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); + + std::string result = cm_get_last_error_message(); + std::string origin("invalid log_filename"); + BOOST_CHECK_EQUAL(origin, result); } -BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_hash0_test, access_log_machine_fixture) { - cm_error error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); +BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_hash0_test, step_log_machine_fixture) { + const auto filename = make_log_filename(); + cm_error error_code = cm_log_step_uarch(_machine, 1, filename.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); BOOST_CHECK_EQUAL(std::string(""), std::string(cm_get_last_error_message())); cm_hash hash1; - error_code = cm_verify_step_uarch(nullptr, nullptr, _access_log, &hash1); + error_code = cm_verify_step_uarch(nullptr, nullptr, filename.c_str(), 1, &hash1); BOOST_CHECK_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); std::string result = cm_get_last_error_message(); @@ -1187,13 +1207,14 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_hash0_test, access_log_mac BOOST_CHECK_EQUAL(origin, result); } -BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_hash1_test, access_log_machine_fixture) { - cm_error error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); +BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_hash1_test, step_log_machine_fixture) { + const auto filename = make_log_filename(); + cm_error error_code = cm_log_step_uarch(_machine, 1, filename.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); BOOST_CHECK_EQUAL(std::string(""), std::string(cm_get_last_error_message())); cm_hash hash0; - error_code = cm_verify_step_uarch(nullptr, &hash0, _access_log, nullptr); + error_code = cm_verify_step_uarch(nullptr, &hash0, filename.c_str(), 1, nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); std::string result = cm_get_last_error_message(); @@ -1201,18 +1222,18 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_hash1_test, access_log_mac BOOST_CHECK_EQUAL(origin, result); } -BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_access_log_test, access_log_machine_fixture) { +BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_null_filename_with_hashes_test, step_log_machine_fixture) { cm_hash hash0; cm_hash hash1; - cm_error error_code = cm_verify_step_uarch(nullptr, &hash0, nullptr, &hash1); + cm_error error_code = cm_verify_step_uarch(nullptr, &hash0, nullptr, 0, &hash1); BOOST_CHECK_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); std::string result = cm_get_last_error_message(); - std::string origin("invalid access log"); + std::string origin("invalid log_filename"); BOOST_CHECK_EQUAL(origin, result); } -BOOST_FIXTURE_TEST_CASE_NOLINT(log_step_uarch_until_halt, access_log_machine_fixture) { +BOOST_FIXTURE_TEST_CASE_NOLINT(log_step_uarch_until_halt, step_log_machine_fixture) { cm_hash hash0{}; cm_hash hash1{}; cm_hash hash2{}; @@ -1238,42 +1259,39 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(log_step_uarch_until_halt, access_log_machine_fix BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); // step 1 - error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); + const auto filename1 = make_log_filename(); + error_code = cm_log_step_uarch(_machine, 1, filename1.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); - // get hash after step error_code = cm_get_root_hash(_machine, &hash1); BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); - // verify - error_code = cm_verify_step_uarch(nullptr, &hash0, _access_log, &hash1); + error_code = cm_verify_step_uarch(nullptr, &hash0, filename1.c_str(), 1, &hash1); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); // step 2 - error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); + const auto filename2 = make_log_filename(); + error_code = cm_log_step_uarch(_machine, 1, filename2.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); - // get hash after step error_code = cm_get_root_hash(_machine, &hash2); BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); - // verify - error_code = cm_verify_step_uarch(nullptr, &hash1, _access_log, &hash2); + error_code = cm_verify_step_uarch(nullptr, &hash1, filename2.c_str(), 1, &hash2); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); // step 3 - error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); + const auto filename3 = make_log_filename(); + error_code = cm_log_step_uarch(_machine, 1, filename3.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); - // get hash after step error_code = cm_get_root_hash(_machine, &hash3); BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); - // verify - error_code = cm_verify_step_uarch(nullptr, &hash2, _access_log, &hash3); + error_code = cm_verify_step_uarch(nullptr, &hash2, filename3.c_str(), 1, &hash3); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); + // step 4 - error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); + const auto filename4 = make_log_filename(); + error_code = cm_log_step_uarch(_machine, 1, filename4.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); - // get hash after step error_code = cm_get_root_hash(_machine, &hash4); BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); - // verify - error_code = cm_verify_step_uarch(_machine, &hash3, _access_log, &hash4); + error_code = cm_verify_step_uarch(_machine, &hash3, filename4.c_str(), 1, &hash4); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); // at micro cycle 4 @@ -1287,7 +1305,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(log_step_uarch_until_halt, access_log_machine_fix BOOST_REQUIRE_EQUAL(halt, 1); } -BOOST_FIXTURE_TEST_CASE_NOLINT(step_complex_test, access_log_machine_fixture) { +BOOST_FIXTURE_TEST_CASE_NOLINT(step_complex_test, step_log_machine_fixture) { cm_hash hash0; cm_hash hash1; @@ -1295,7 +1313,8 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(step_complex_test, access_log_machine_fixture) { BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); BOOST_REQUIRE_EQUAL(std::string(cm_get_last_error_message()), std::string("")); - error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); + const auto filename = make_log_filename(); + error_code = cm_log_step_uarch(_machine, 1, filename.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); BOOST_CHECK_EQUAL(std::string(""), std::string(cm_get_last_error_message())); @@ -1303,14 +1322,14 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(step_complex_test, access_log_machine_fixture) { BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_OK); BOOST_REQUIRE_EQUAL(std::string(cm_get_last_error_message()), std::string("")); - error_code = cm_verify_step_uarch(_machine, &hash0, _access_log, &hash1); + error_code = cm_verify_step_uarch(_machine, &hash0, filename.c_str(), 1, &hash1); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); BOOST_CHECK_EQUAL(std::string(""), std::string(cm_get_last_error_message())); } -BOOST_FIXTURE_TEST_CASE_NOLINT(step_hash_test, access_log_machine_fixture) { - - cm_error error_code = cm_log_step_uarch(_machine, _log_type, &_access_log); +BOOST_FIXTURE_TEST_CASE_NOLINT(step_hash_test, step_log_machine_fixture) { + const auto filename = make_log_filename(); + cm_error error_code = cm_log_step_uarch(_machine, 1, filename.c_str(), nullptr); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); BOOST_CHECK_EQUAL(std::string(""), std::string(cm_get_last_error_message())); @@ -1398,7 +1417,7 @@ BOOST_AUTO_TEST_CASE_NOLINT(machine_run_uarch_null_machine_test) { BOOST_REQUIRE_EQUAL(error_code, CM_ERROR_INVALID_ARGUMENT); } -BOOST_FIXTURE_TEST_CASE_NOLINT(machine_run_uarch_advance_one_cycle, access_log_machine_fixture) { +BOOST_FIXTURE_TEST_CASE_NOLINT(machine_run_uarch_advance_one_cycle, step_log_machine_fixture) { // ensure that uarch cycle is 0 uint64_t cycle{}; @@ -1421,7 +1440,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(machine_run_uarch_advance_one_cycle, access_log_m BOOST_REQUIRE_EQUAL(cycle, 1); } -BOOST_FIXTURE_TEST_CASE_NOLINT(machine_run_uarch_advance_until_halt, access_log_machine_fixture) { +BOOST_FIXTURE_TEST_CASE_NOLINT(machine_run_uarch_advance_until_halt, step_log_machine_fixture) { // ensure that uarch cycle is 0 uint64_t cycle{}; cm_error error_code = cm_read_reg(_machine, CM_REG_UARCH_CYCLE, &cycle); @@ -1733,4 +1752,13 @@ BOOST_AUTO_TEST_CASE_NOLINT(uarch_solidity_compatibility_layer) { BOOST_CHECK_EQUAL(int8ToUint64(int8(127)), 127); BOOST_CHECK_EQUAL(int8ToUint64(int8(-128)), 0xffffffffffffff80ULL); } + +BOOST_AUTO_TEST_CASE_NOLINT(pretty_print_step_uarch_failure_clears_output_test) { + // On failure the documented contract is *printout == nullptr; a stale pointer would let the caller + // consume previous text as if it came from the failed log. + const char *printout = "stale"; + const cm_error error_code = cm_pretty_print_step_uarch("/no/such/uarch.log", &printout); + BOOST_CHECK_NE(error_code, CM_ERROR_OK); + BOOST_CHECK(printout == nullptr); +} // NOLINTEND(cppcoreguidelines-avoid-do-while,cppcoreguidelines-non-private-member-variables-in-classes) diff --git a/tests/scripts/collect-uarch-test-logs.sh b/tests/scripts/collect-uarch-test-logs.sh deleted file mode 100755 index 4fe554e1f..000000000 --- a/tests/scripts/collect-uarch-test-logs.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Copyright Cartesi and individual authors (see AUTHORS) -# SPDX-License-Identifier: LGPL-3.0-or-later -# -# This program is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) any -# later version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License along -# with this program (see COPYING). If not, see . -# -set -e -mkdir -m 755 -p /tmp/uarch-riscv-tests-json-logs -uarch-riscv-tests --output-dir=/tmp/uarch-riscv-tests-json-logs --create-reset-uarch-log --create-send-cmio-response-log --jobs=$(nproc) json-step-logs -tar -czf uarch-riscv-tests-json-logs.tar.gz -C /tmp uarch-riscv-tests-json-logs diff --git a/tests/uarch/Makefile b/tests/uarch/Makefile index 04079ab09..e4ce44bd5 100644 --- a/tests/uarch/Makefile +++ b/tests/uarch/Makefile @@ -26,7 +26,7 @@ RISCV_OBJDUMP ?= $(RISCV_PREFIX)objdump --disassemble-all --disassemble-zeroes - RISCV_OBJCOPY ?= $(RISCV_PREFIX)objcopy -S -O binary # Tests provided by us -TESTS = fence ebreak ecall-putchar ecall-unsupported ecall-mark-page-dirty ecall-write-tlb +TESTS = fence ebreak ecall-putchar ecall-unsupported ecall-removed-mark-page-dirty ecall-write-tlb SRC_DIR = $(abspath .) BUILDDIR ?= $(abspath ./build) TEST_NAMES = $(addprefix rv64ui-uarch-, $(TESTS)) diff --git a/tests/uarch/ecall-mark-page-dirty.S b/tests/uarch/ecall-removed-mark-page-dirty.S similarity index 83% rename from tests/uarch/ecall-mark-page-dirty.S rename to tests/uarch/ecall-removed-mark-page-dirty.S index 95f95fa22..300c2c3db 100644 --- a/tests/uarch/ecall-mark-page-dirty.S +++ b/tests/uarch/ecall-removed-mark-page-dirty.S @@ -21,12 +21,10 @@ RVTEST_RV64U RVTEST_CODE_BEGIN - li a7, UARCH_ECALL_FN_MARK_DIRTY_PAGE_DEF - li a0, 0 // physical address in page to be marked dirty - li a1, 0 // index of PMA where page falls + li a7, 3 // function code 3 was mark_dirty_page, now removed and unsupported ecall -RVTEST_PASS +RVTEST_FAIL // it should fail previously so it can't reach this point RVTEST_CODE_END diff --git a/tests/uarch/rv64ui-uarch-catalog.json b/tests/uarch/rv64ui-uarch-catalog.json index 716d33797..b94403318 100644 --- a/tests/uarch/rv64ui-uarch-catalog.json +++ b/tests/uarch/rv64ui-uarch-catalog.json @@ -51,7 +51,6 @@ { "path": "rv64ui-uarch-xori.bin", "cycle": 178}, { "path": "rv64ui-uarch-fence.bin", "cycle": 13}, { "path": "rv64ui-uarch-ecall-putchar.bin", "cycle": 15}, - { "path": "rv64ui-uarch-ecall-mark-page-dirty.bin", "cycle": 15}, { "path": "rv64ui-uarch-ecall-write-tlb.bin", "cycle": 46} ] diff --git a/uarch/uarch-bridge-state-access.hpp b/uarch/uarch-bridge-state-access.hpp index 1a3c6a889..8eee1e76d 100644 --- a/uarch/uarch-bridge-state-access.hpp +++ b/uarch/uarch-bridge-state-access.hpp @@ -196,11 +196,6 @@ class uarch_bridge_state_access : return false; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - void do_mark_dirty_page(uint64_t paddr, uint64_t pma_index) const { - ua_mark_dirty_page_ECALL(paddr, pma_index); - } - constexpr const char *do_get_name() const { // NOLINT(readability-convert-member-functions-to-static) return "uarch_bridge_state_access"; } diff --git a/uarch/uarch-ecall.c b/uarch/uarch-ecall.c index 9b85b739d..15f0085c5 100644 --- a/uarch/uarch-ecall.c +++ b/uarch/uarch-ecall.c @@ -41,18 +41,6 @@ void ua_putchar_ECALL(uint8_t c) { ); } -void ua_mark_dirty_page_ECALL(uint64_t paddr, uint64_t pma_index) { - // NOLINTNEXTLINE(hicpp-no-assembler) - asm volatile("mv a7, %0\n" - "mv a0, %1\n" - "mv a1, %2\n" - "ecall\n" - : // no output - : "r"(UARCH_ECALL_FN_MARK_DIRTY_PAGE_DEF), "r"(paddr), "r"(pma_index) - : "a7", "a0", "a1" // clobbered registers - ); -} - void ua_write_tlb_ECALL(uint64_t use, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, uint64_t pma_index) { // NOLINTNEXTLINE(hicpp-no-assembler) diff --git a/uarch/uarch-ecall.h b/uarch/uarch-ecall.h index 08bdc27af..0e161a021 100644 --- a/uarch/uarch-ecall.h +++ b/uarch/uarch-ecall.h @@ -26,7 +26,6 @@ extern "C" { void ua_halt_ECALL(); void ua_putchar_ECALL(uint8_t c); -void ua_mark_dirty_page_ECALL(uint64_t paddr, uint64_t pma_index); void ua_write_tlb_ECALL(uint64_t use, uint64_t slot_index, uint64_t vaddr_page, uint64_t vp_offset, uint64_t pma_index); #ifdef __cplusplus