diff --git a/.dockerignore b/.dockerignore index d39f81a13..38ce5cb2f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -11,14 +11,15 @@ **/*.deb build +tests/build +tests/fuzz/corpus +tests/fuzz/corpus-config +tests/fuzz/seed-corpus third-party/downloads +third-party/riscv-arch-test/riscv-test-stats src/cartesi-jsonrpc-machine src/cartesi-hash-tree-hash - -doc/html -doc/api.md -doc/html/ -doc/xml/ +doc/ .git .github diff --git a/.gitattributes b/.gitattributes index f77a34ea7..5fe573d75 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ third-party/** linguist-vendored tests/** linguist-vendored +doc/recipes/rootfs-docs.ext2 filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 10eb81674..8e91ac06b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -685,9 +685,101 @@ jobs: run: | docker run --rm -t ${{ github.repository_owner }}/machine-emulator:sanitizer make sanitize=yes test-save-and-load test-machine test-lua test-jsonrpc test-c-api test-c-api-remote test-c-jsonrpc-api test-coverage-machine test-uarch-rv64ui test-uarch-interpreter test-coverage-uarch + docs: + name: Docs + needs: build + runs-on: ubuntu-latest-8-cores + steps: + - name: Checkout machine emulator source code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: recursive + lfs: true + + - name: Setup variables + run: echo MACHINE_EMULATOR_VERSION=`make version` >> $GITHUB_ENV + + - name: Install e2tools + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends e2tools + + - name: Check rootfs-docs.ext2 is up to date + run: make -C doc check-rootfs-docs + + - name: Ensure doc/recipes/rootfs-docs.licenses.md is tracked + run: | + if ! git ls-files --error-unmatch doc/recipes/rootfs-docs.licenses.md >/dev/null 2>&1; then + echo "::error::doc/recipes/rootfs-docs.licenses.md is not tracked in git. The sync check requires a committed baseline to diff against. Generate it with 'make -C doc build-rootfs-docs-license' and commit the result." + exit 1 + fi + + - name: Build rootfs-docs license report + run: make -C doc build-rootfs-docs-license + + - name: Check rootfs-docs.licenses.md matches committed version + run: | + if git diff --quiet doc/recipes/rootfs-docs.licenses.md; then + exit 0 + fi + git --no-pager diff doc/recipes/rootfs-docs.licenses.md + echo "::error::doc/recipes/rootfs-docs.licenses.md is out of sync with the generator. The diff above is what 'make -C doc build-rootfs-docs-license' produces vs the committed file. Check if changes were intended and, if so, regenerate locally and commit the result." + exit 1 + + - name: Login to Docker Hub + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Depot CLI + uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1 + + - name: Build machine-emulator docker image + uses: depot/build-push-action@5f3b3c2e5a00f0093de47f657aeaefcedff27d18 # v1.17.0 + with: + file: Dockerfile + context: . + platforms: linux/amd64 + tags: cartesi/machine-emulator:devel + push: false + load: true + build-args: | + DEBUG=${{ (startsWith(github.ref, 'refs/tags/v') && 'no' || 'yes') }} + GIT_COMMIT=${GITHUB_SHA} + MACHINE_EMULATOR_VERSION=${{ env.MACHINE_EMULATOR_VERSION }} + project: ${{ vars.DEPOT_PROJECT }} + token: ${{ secrets.DEPOT_TOKEN }} + + - name: Ensure doc/README.md is tracked + run: | + if ! git ls-files --error-unmatch doc/README.md >/dev/null 2>&1; then + echo "::error::doc/README.md is not tracked in git. The sync check requires a committed baseline to diff against. Generate it with 'make -C doc README.md' and commit the result." + exit 1 + fi + + - name: Build documentation + run: make -C doc README.md + + - name: Check README.md matches committed version + run: | + if git diff --quiet doc/README.md; then + exit 0 + fi + git --no-pager diff doc/README.md + echo "::error::doc/README.md is out of sync with the generator. The diff above is what 'make -C doc README.md' produces vs the committed file. Check if changes were intended and, if so, regenerate locally and commit the result." + exit 1 + publish_artifacts: name: Publish artifacts - needs: [build, lint, coverage, sanitize, test_amd64, test_arm64, risc0] + needs: [build, lint, coverage, sanitize, test_amd64, test_arm64, risc0, docs] runs-on: ubuntu-latest steps: - name: Checkout emulator source code @@ -862,7 +954,7 @@ jobs: - name: Install rzup run: | - cargo install --git https://github.com/risc0/risc0 rzup + cargo install --locked --git https://github.com/risc0/risc0 rzup rzup install cargo-risczero 3.0.5 rzup install r0vm 3.0.5 rzup install cpp 2024.1.5 diff --git a/.gitignore b/.gitignore index e377ade0d..202e01405 100644 --- a/.gitignore +++ b/.gitignore @@ -18,10 +18,8 @@ src/cartesi-jsonrpc-machine src/cartesi-hash-tree-hash src/tests/test-machine-c-api -doc/html -doc/api.md -doc/html/ -doc/xml/ +doc/doxygen/html +doc/recipes/cache .clangd .venv diff --git a/CHANGELOG.md b/CHANGELOG.md index b01d548f3..a3f1bf836 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,16 +6,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## Added -- Added `--nvram` command line option and `nvram_configs` machine configuration for UIO-backed memory ranges, exposed to the guest via `generic-uio` -- Added `label` field to flash drive and nvram configurations, propagated to the guest through DTB properties -- Added optional directory argument to `--dump-memory-ranges` to support read-only install locations +- Added a user manual under `doc/`, generated from a template by a docgen pipeline that executes and verifies every code snippet against the locally built emulator +- Added `--nvram` command line option and `nvram` machine configuration for UIO-backed memory ranges, exposed to the guest as `/dev/uio*` via `generic-uio` +- Added a `label` field to memory range configurations, exposed to the guest through a standard DTB `/aliases` node (`flashdriveN`, `nvramN`, and any user label) +- Added recording of a revert root hash as a logged input of `send_cmio_response`, with `read_revert_root_hash`/`write_revert_root_hash` accessors across all API layers +- Added reversion to the recorded revert root hash for rollup inputs that end rejected, when logging steps, logging uarch resets, verifying, and collecting root hashes +- Added emission of per-output proofs from `--cmio-advance-state` +- Added an optional user schema dictionary argument to `cartesi.tojson`/`cartesi.fromjson` to name binary and compound fields of caller-defined message types +- Added a `cartesi.hash-tree` Lua module for hash-tree slice/splice verification and building the output-hashes frontier behind the output proofs +- Added `get_address_name` to resolve a physical address to a descriptive name, across the C, Lua, and JSON-RPC APIs +- Added the ability for `--initial-hash` and `--final-hash` to write the hash to a file +- Added an optional directory argument to `--dump-memory-ranges` to support read-only install locations - Added decoding of RISC-V Zcb compressed instructions (required by kernels built with GCC 14) - Added fallback to `read_reg` in the GDB stub so `monitor reg ` works for any named register -- Added `CM_FLASH_DRIVE_MAX`, `CM_NVRAM_MAX`, and `CM_MEMORY_RANGE_LABEL_MAX` constants to the public C API +- Added `--bash-completion` to print a bash completion script for `cartesi-machine` +- Added public C API constants `CM_FLASH_DRIVE_MAX`, `CM_NVRAM_MAX`, `CM_MEMORY_RANGE_LABEL_MAX`, `CM_RTC_FREQ_DIV`, and `CM_CMIO_LOG2_MAX_OUTPUT_COUNT` +- Added the peripheral `CM_AR_*` address range constants, the `CM_PMA_*_DID` driver id constants, the HTIF device, command, shift, and mask constants, and the `CM_DTB_BOOTARGS_*` macros to the public C API - Added LuaCov-based coverage tracking for Lua code, integrated with the gcov report pipeline +- Added a JSON-RPC C API coverage suite and converted `test-cm-cli` and `test-evmu` to the lester spec format - Added `spec-cm-cli.lua` covering every command-line option of `cartesi-machine.lua` ## Fixed +- Fixed leaf size in `cartesi-hash-tree-hash`, which was 8 instead of 32 +- Fixed read-only flash drives not being mounted with `-o ro`, which trapped guest writes and panicked init +- Fixed missing `#address-cells` on the per-CPU `interrupt-controller` node in the DTB, silencing a `dtc` interrupt-provider lint warning - Fixed firmware reserved region in the DTB being too small for OpenSBI built with GCC 14 - Fixed missing validation of memory range labels and of `replace_memory_range` arguments - Fixed boolean merging in `merge_memory_range_opts` so options can be explicitly overridden to `false` @@ -23,15 +37,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed `dump_pmas()` still calling the removed `get_memory_ranges()` method - Fixed unanchored patterns matching `--quiet` and `--assert-rolling-template` - Fixed `help()` internally calling `os.exit()` +- Fixed a typo in the `cartesi-machine.lua` cmio handling ## Changed -- Renamed `--dump-memory-ranges` to `--dump-address-ranges` for consistency with `get_address_ranges()` +- Renamed the yield constants in `cm.h` and the Lua API from `CM_CMIO_YIELD_*` to `CM_HTIF_YIELD_*` (and the command suffix from `COMMAND` to `CMD`) +- Renamed the PMA "device id" to "driver id" across the public API (`CM_PMA_*_DID` constants, `driver_id` in `get_address_ranges`) +- Changed `get_address_ranges` to report per-range attributes (`is_memory`, `is_device`, `is_readable`, `is_writeable`, `is_executable`, `is_read_idempotent`, `is_write_idempotent`, and `driver_id`) +- Replaced the `--store-json-config`/`--load-json-config` options with a `format:` sub-option on `--store-config`/`--load-config`, defaulting to the format inferred from the filename extension +- Changed `--initial-proof`/`--final-proof` to default to Lua tables and accept `format:` and `label:` sub-options, where before they were dumped only as JSON +- Reworked command-line option parsing so compound options such as `--volume` and `--port-forward` take `key:value` sub-options, and short options take a space-separated value (`-u ` instead of `-u=`) +- Changed memory ranges with an unset start to be placed past the end of RAM, rounded up to a power of two and aligned to their length, with flash drives and nvrams drawn from a shared pool +- Changed `log_send_cmio_response` and `verify_send_cmio_response` to treat invalid responses and advance-state responses delivered outside an rx-accepted manual yield as no-ops, while the live `send_cmio_response` still rejects them as errors +- Changed the uarch state-access layer to align misaligned accesses down to their natural size instead of rejecting them +- Changed JSON-RPC error logs to omit the Boost `source_location` suffix at non-debug levels - Renamed all C++ headers from `.h` to `.hpp`, and renamed `machine-c-api.{h,cpp}` to `cm.{h,cpp}` -- Bumped machine configuration archive version from 6 to 7 (for the new `nvram_configs` and `label` fields) +- Bumped machine configuration archive version from 6 to 7 (for the new `nvram` and `label` fields) - Moved the `/run/cartesi/memoryranges/` sysfs setup from the DTB init script into `cartesi-init` - Updated guest bootargs to bind `uio_pdrv_genirq` to generic-uio nodes - Bumped test `linux.bin` and `rootfs.ext2` images +## Removed +- Removed the `--store-json-config` and `--load-json-config` options (folded into the `format:` sub-option of `--store-config`/`--load-config`) +- Removed the `--replace-flash-drive` option, subsumed by `--replace-memory-range` +- Removed `CM_ERROR_REGEX_ERROR` (covered by `CM_ERROR_RUNTIME_ERROR`) and `CM_ERROR_SYSTEM_ERROR`, and renumbered the error enum +- Removed `machine_reg_get_name` in favor of `get_address_name` +- Removed `mark_dirty_page` from the uarch and state-access interfaces + ## [0.20.0] - 2026-04-09 ## Added - Added RISC0 zkVM integration for fraud proof verification, with C++, Rust, and Solidity implementations diff --git a/Dockerfile b/Dockerfile index 10cdd977a..bfb90eab0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,7 @@ -FROM debian:trixie-20250811 AS toolchain +ARG BUILD_BASE=debian:trixie-20250811 +ARG RUNTIME_BASE=debian:trixie-20250811-slim + +FROM $BUILD_BASE AS toolchain RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ @@ -56,15 +59,24 @@ FROM builder AS debian-packager RUN make install-uarch debian-package DESTDIR=$PWD/_install #################################################################################################### -FROM debian:trixie-20250811-slim +FROM $RUNTIME_BASE ARG TARGETARCH +ARG RUNTIME_BASE +LABEL io.cartesi.machine-emulator.base-image="$RUNTIME_BASE" COPY --from=debian-packager /usr/src/emulator/machine-emulator_${TARGETARCH}.deb machine-emulator.deb +COPY tests/dependencies tests/dependencies.sha256 /usr/share/cartesi-machine/ RUN apt-get update && \ - apt-get install -y ./machine-emulator.deb && \ + apt-get install -y gosu ./machine-emulator.deb && \ rm -rf /var/lib/apt/lists/* /var/cache/apt/* machine-emulator.deb +# Carried but dormant (no ENTRYPOINT set here, so production behavior is +# unchanged): lets a derived dev image (e.g. doc/) opt into running as the host +# user by pointing ENTRYPOINT at this script. Needs gosu, installed above. +COPY tools/docker-entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + RUN groupadd --system --gid 102 cartesi && \ useradd --system --uid 102 --gid 102 --no-create-home --home /nonexistent --comment "cartesi user" --shell /bin/false cartesi diff --git a/Makefile b/Makefile index 389b5e91c..b19b4fe65 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ EMU_TO_BIN= src/cartesi-jsonrpc-machine src/cartesi-hash-tree-hash EMU_TO_LIB= src/$(LIBCARTESI_SO) src/$(LIBCARTESI_SO_JSONRPC) EMU_TO_LIB_A= src/libcartesi.a src/libcartesi_jsonrpc.a src/libluacartesi.a src/libluacartesi_jsonrpc.a EMU_LUA_TO_BIN= src/cartesi-machine.lua src/cartesi-machine-stored-hash.lua -EMU_TO_LUA_PATH= src/cartesi/util.lua src/cartesi/gdbstub.lua src/cartesi/evmu.lua +EMU_TO_LUA_PATH= src/cartesi/util.lua src/cartesi/gdbstub.lua src/cartesi/evmu.lua src/cartesi/bash.lua src/cartesi/hash-tree.lua EMU_TO_LUA_THIRD_PARTY_PATH= src/cartesi/third-party/bint.lua EMU_TO_LUA_CPATH= src/cartesi.so EMU_TO_LUA_CARTESI_CPATH= src/cartesi/jsonrpc.so @@ -105,6 +105,12 @@ export UARCH_DEFS TAG ?= devel DEBIAN_IMG ?= cartesi/machine-emulator:$(TAG).deb +# Base images for the emulator Dockerfile. Single source of truth: passed as +# build-args and stamped into the image as a label so downstream images (e.g. +# doc/) can reuse the exact same base without duplicating these values. +BUILD_BASE ?= debian:trixie-20250811 +RUNTIME_BASE ?= debian:trixie-20250811-slim + # Docker image platform BUILD_PLATFORM ?= @@ -247,11 +253,14 @@ test% coverage% build-tests%: build-tests-misc-with-builder-image: build-emulator-builder-image -lint check-format format check-format-lua check-lua format-lua: +lint check-format format: @$(MAKE) $@-src $@-tests +check-format-lua check-lua format-lua: + @$(MAKE) $@-src $@-tests $@-doc + lint-% check-format-% format-% check-format-lua-% check-lua-% format-lua-%: - @eval $$($(MAKE) -s --no-print-directory env); $(MAKE) -C $(if $(findstring -src,$@),src,tests) $(subst -src,,$(subst -tests,,$@)) + @eval $$($(MAKE) -s --no-print-directory env); $(MAKE) -C $(if $(findstring -doc,$@),doc,$(if $(findstring -src,$@),src,tests)) $(subst -doc,,$(subst -src,,$(subst -tests,,$@))) source-default: @eval $$($(MAKE) -s --no-print-directory env); $(MAKE) -C $(SRCDIR) @@ -269,13 +278,13 @@ $(SRCDIR)/interpret-jump-table.hpp: @eval $$($(MAKE) -s --no-print-directory env); $(MAKE) -C $(SRCDIR) interpret-jump-table.hpp build-emulator-builder-image: - docker build $(DOCKER_PLATFORM) --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) --target builder -t cartesi/machine-emulator:builder -f Dockerfile . + docker build $(DOCKER_PLATFORM) --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) --build-arg BUILD_BASE=$(BUILD_BASE) --target builder -t cartesi/machine-emulator:builder -f Dockerfile . build-emulator-toolchain-image build-toolchain: - docker build $(DOCKER_PLATFORM) --target toolchain -t cartesi/machine-emulator:toolchain -f Dockerfile . + docker build $(DOCKER_PLATFORM) --build-arg BUILD_BASE=$(BUILD_BASE) --target toolchain -t cartesi/machine-emulator:toolchain -f Dockerfile . build-emulator-image: - docker build $(DOCKER_PLATFORM) --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) -t cartesi/machine-emulator:$(TAG) -f Dockerfile . + docker build $(DOCKER_PLATFORM) --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) --build-arg BUILD_BASE=$(BUILD_BASE) --build-arg RUNTIME_BASE=$(RUNTIME_BASE) -t cartesi/machine-emulator:$(TAG) -f Dockerfile . build-emulator-tests-image: build-emulator-builder-image build-emulator-image docker build $(DOCKER_PLATFORM) --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) --build-arg TAG=$(TAG) -t cartesi/machine-emulator:tests -f tests/Dockerfile . @@ -284,7 +293,7 @@ build-emulator-tests-builder-image: build-emulator-builder-image docker build $(DOCKER_PLATFORM) --target tests-builder --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) --build-arg TAG=$(TAG) -t cartesi/machine-emulator:tests-builder -f tests/Dockerfile . build-debian-package: - docker build $(DOCKER_PLATFORM) --target debian-packager --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) -t $(DEBIAN_IMG) -f Dockerfile . + docker build $(DOCKER_PLATFORM) --target debian-packager --build-arg DEBUG=$(debug) --build-arg COVERAGE=$(coverage) --build-arg THREADS=$(threads) --build-arg SANITIZE=$(sanitize) --build-arg BUILD_BASE=$(BUILD_BASE) -t $(DEBIAN_IMG) -f Dockerfile . build-tests-debian-packages: build-emulator-builder-image docker build $(DOCKER_PLATFORM) --target tests-debian-packager --build-arg TAG=$(TAG) -t cartesi/machine-emulator:tests-debian-packager -f tests/Dockerfile . diff --git a/doc/.dockerignore b/doc/.dockerignore new file mode 100644 index 000000000..ed6d64661 --- /dev/null +++ b/doc/.dockerignore @@ -0,0 +1,6 @@ +Makefile +README.md.template +doxygen +recipes/cache +recipes/*.ext2 +!recipes/rootfs-docs.ext2 diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 000000000..a2e2df65c --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,2 @@ +recipes/rootfs-docs.inputs.sha256 +recipes/rootfs-docs.inputs.sha256-have diff --git a/doc/.luacheckrc b/doc/.luacheckrc new file mode 100644 index 000000000..1d2477882 --- /dev/null +++ b/doc/.luacheckrc @@ -0,0 +1,16 @@ +-- alerts.lua and replace.lua are pandoc Lua filters: pandoc runs them inside +-- its own interpreter, which injects the `pandoc` module along with the +-- FORMAT and PANDOC_SCRIPT_FILE globals, and invokes the filter callbacks +-- (Div, Pandoc) that the filter defines at the top level. Declare that +-- environment so luacheck does not report the pandoc API as undefined or +-- non-standard globals. +-- +-- The recipe scripts under recipes/ are ordinary lua5.4 programs and are left +-- under luacheck's default configuration, so genuine stray globals there are +-- still caught. +local pandoc_filter = { + read_globals = { "FORMAT", "PANDOC_SCRIPT_FILE", "pandoc" }, + globals = { "Div", "Pandoc" }, +} +files["alerts.lua"] = pandoc_filter +files["replace.lua"] = pandoc_filter diff --git a/doc/Dockerfile b/doc/Dockerfile new file mode 100644 index 000000000..4b4eb1955 --- /dev/null +++ b/doc/Dockerfile @@ -0,0 +1,76 @@ +ARG TAG=0.20.0 +ARG GUEST_TOOLS_VERSION=v0.18.0-test4 +ARG GENEXT2FS_VERSION=v1.5.6 +# Base for the host-tool builder stages below. These stages compile binaries +# that are copied into the final emulator-based image, so the base must stay +# ABI-compatible with cartesi/machine-emulator:$TAG. It deliberately does NOT +# reference $TAG, so bumping the emulator image reuses the builders' cache. +# The Makefile overrides this with the emulator image's own base, read from its +# io.cartesi.machine-emulator.base-image label, so it tracks automatically if +# the emulator rebases. +ARG BUILDER_BASE=debian:trixie-20250811-slim + +FROM $BUILDER_BASE AS hex-builder +ARG GUEST_TOOLS_VERSION +USER root +RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ + apt-get install -y --no-install-recommends g++ wget ca-certificates && \ + wget -qO- https://github.com/cartesi/machine-guest-tools/archive/refs/tags/$GUEST_TOOLS_VERSION.tar.gz \ + | tar -xz -C /tmp --wildcards --strip-components=3 '*/sys-utils/hex/hex.cpp' && \ + g++ -O2 -o /usr/local/bin/hex /tmp/hex.cpp + +FROM $BUILDER_BASE AS xgenext2fs-builder +ARG GENEXT2FS_VERSION +USER root +RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ + apt-get install -y --no-install-recommends \ + automake autotools-dev build-essential libarchive-dev wget ca-certificates && \ + wget -qO- https://github.com/cartesi/genext2fs/archive/refs/tags/$GENEXT2FS_VERSION.tar.gz \ + | tar -xz -C /tmp && \ + cd /tmp/genext2fs-* && \ + ./autogen.sh && \ + ./configure --enable-libarchive --prefix=/usr/local && \ + make -j"$(nproc)" install + +FROM cartesi/machine-emulator:$TAG + +USER root + +RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ + apt-get install -y \ + make pandoc \ + bash-completion bc devio device-tree-compiler e2tools file graphviz jq libarchive13 lua-dkjson lua-socket net-tools wget && \ + rm -rf /var/lib/apt/lists/* + +# Snapshot cartesi-machine's bash completion into the canonical dir, then enable +# the framework loader by uncommenting the block Debian ships (disabled) in +# /etc/bash.bashrc. The package's /etc/profile.d snippet only covers login +# shells; the playground runs a non-login shell, which reads only bash.bashrc. +# With the loader active, the file lazy-loads on first `cartesi-machine `. +RUN cartesi-machine --bash-completion > /usr/share/bash-completion/completions/cartesi-machine && \ + sed -i '/^#if ! shopt -oq posix; then/,/^#fi$/ s/^#//' /etc/bash.bashrc + +COPY --from=hex-builder /usr/local/bin/hex /usr/local/bin/hex +COPY --from=xgenext2fs-builder /usr/local/bin/xgenext2fs /usr/local/bin/xgenext2fs + +RUN mkdir -p /tmp/cartesi-images/build/images && \ + cd /usr/share/cartesi-machine && \ + wget -nc -i dependencies -P /tmp/cartesi-images/build/images && \ + cd /tmp/cartesi-images && \ + cp /usr/share/cartesi-machine/dependencies.sha256 . && \ + sha256sum --check dependencies.sha256 && \ + cd /usr/share/cartesi-machine/images && \ + mv /tmp/cartesi-images/build/images/linux-*.bin . && \ + ln -s "$(ls linux-*.bin)" linux.bin && \ + rm -rf /tmp/cartesi-images + +COPY recipes/rootfs-docs.ext2 /usr/share/cartesi-machine/images/rootfs-docs.ext2 +RUN ln -s rootfs-docs.ext2 /usr/share/cartesi-machine/images/rootfs.ext2 + +ENV DEV_ENV_HAS_DOCGEN=yes +ENV PATH=/work/recipes:${PATH} + +# Run as the host user: the entrypoint (carried dormant by the emulator image) +# reads the USER/UID/GID/GROUP env that run-docs-image / docs-image-exec pass +# and gosu-drops to it, so files written under /work are owned by the host user. +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 000000000..778bbaecb --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,236 @@ +# Docker image tag — must match the tag used by the root Makefile's +# build-emulator-image target so the docs image layers on top of an +# emulator built from the local source (kernel/rootfs images in +# tests/dependencies are built against the unreleased branch and may +# require emulator features not in any released base image). +TAG ?= devel + +DOCS_IMAGE = cartesi/machine-emulator-docs + +# Docker image platform +BUILD_PLATFORM ?= + +ifneq ($(BUILD_PLATFORM),) +DOCKER_PLATFORM=--platform $(BUILD_PLATFORM) +endif + +DEV_ENV_HAS_DOCGEN ?= no +RECIPES_DIR ?= $(CURDIR)/recipes +REPLACE_CACHE_DIR := $(RECIPES_DIR)/cache +export RECIPES_DIR REPLACE_CACHE_DIR +export LUA_PATH := $(CURDIR)/?.lua;$(RECIPES_DIR)/?.lua;$(LUA_PATH);; + +STYLUA=stylua +STYLUA_FLAGS=--indent-type Spaces --respect-ignores + +.PHONY: build-docs-image clean format-lua check-format-lua check-lua +.DELETE_ON_ERROR: + +# Lua linting/formatting for the recipe snippets and pandoc filters. The +# generated cache (REPLACE_CACHE_DIR) holds extracted block bodies that are +# rewritten on every render, so it is excluded from both tools. +format-lua: + @$(STYLUA) $(STYLUA_FLAGS) . + +check-format-lua: + @$(STYLUA) $(STYLUA_FLAGS) --check . + +check-lua: + luacheck . --exclude-files '$(REPLACE_CACHE_DIR)/**' + +ROOTFS_DOCS_INPUTS := \ + $(RECIPES_DIR)/Dockerfile.rootfs-docs \ + $(RECIPES_DIR)/build-rootfs-docs.sh \ + $(RECIPES_DIR)/puppet.c \ + $(wildcard $(RECIPES_DIR)/hello/*) + +ROOTFS_DOCS_EXT2 := $(RECIPES_DIR)/rootfs-docs.ext2 +ROOTFS_DOCS_KEY := $(RECIPES_DIR)/rootfs-docs.inputs.sha256 + +# Fingerprint of the recipe inputs. Generated and gitignored, embedded into the +# image at build time and re-derived here to spot a stale committed image +# without rebuilding. Both sides use the same input list so they cannot drift. +$(ROOTFS_DOCS_KEY): $(ROOTFS_DOCS_INPUTS) + cat $(sort $(ROOTFS_DOCS_INPUTS)) | sha256sum | head -c 64 > $@ + +# Build the image. build-rootfs-docs.sh embeds $(ROOTFS_DOCS_KEY) into it. +$(ROOTFS_DOCS_EXT2): $(ROOTFS_DOCS_INPUTS) $(ROOTFS_DOCS_KEY) + cd $(RECIPES_DIR) && ./build-rootfs-docs.sh + +# Convenience alias, since the real target is an absolute path. +.PHONY: build-rootfs-docs +build-rootfs-docs: $(ROOTFS_DOCS_EXT2) + +# Verify the committed image matches the current recipe inputs. No rebuild and no +# network, so CI runs it after a git lfs pull. A missing or stale image is a hard +# error. e2cp copies the embedded fingerprint out under a -have suffix to compare +# against the freshly derived one. +.PHONY: check-rootfs-docs +check-rootfs-docs: $(ROOTFS_DOCS_KEY) + @test -s $(ROOTFS_DOCS_EXT2) || { \ + echo "ERROR: rootfs-docs.ext2 missing, run 'git lfs pull'"; exit 1; } + @e2cp $(ROOTFS_DOCS_EXT2):/var/log/rootfs-docs.inputs.sha256 $(ROOTFS_DOCS_KEY)-have + @cmp -s $(ROOTFS_DOCS_KEY) $(ROOTFS_DOCS_KEY)-have || { \ + echo "ERROR: rootfs-docs.ext2 is stale (recipe inputs changed)."; \ + echo "Run 'make -C doc build-rootfs-docs' and commit it."; \ + exit 1; \ + } + +ROOTFS_DOCS_LICENSES := $(RECIPES_DIR)/rootfs-docs.licenses.md + +# Package and license report for the committed image. It is derived from the +# ext2 itself rather than from a rebuild, and committed alongside it so a +# checkout carries the license information with the binary. Like the targets +# above, freshness is judged by content rather than timestamps, so this always +# regenerates the report. CI then diffs it against the committed version, as +# it does for README.md. +.PHONY: build-rootfs-docs-license +build-rootfs-docs-license: | ensure-rootfs-docs + cd $(RECIPES_DIR) && ./build-rootfs-docs-license.sh > $(ROOTFS_DOCS_LICENSES) + +# Local convenience used before the docs image is built. Fetch the image from +# LFS if absent or still an unsmudged pointer, rebuild it locally if the embedded +# fingerprint no longer matches the recipe inputs. +.PHONY: ensure-rootfs-docs +ensure-rootfs-docs: + @if [ ! -s $(ROOTFS_DOCS_EXT2) ] || \ + git lfs pointer --check --file $(ROOTFS_DOCS_EXT2) >/dev/null 2>&1; then \ + echo "rootfs-docs.ext2 not present, fetching from git lfs..."; \ + git lfs pull --include "$(ROOTFS_DOCS_EXT2)" || true; \ + fi + @if $(MAKE) -s check-rootfs-docs >/dev/null 2>&1; then \ + echo "rootfs-docs.ext2 is up to date."; \ + else \ + echo "rootfs-docs.ext2 missing or stale, rebuilding locally..."; \ + rm -f $(ROOTFS_DOCS_EXT2); \ + $(MAKE) $(ROOTFS_DOCS_EXT2); \ + fi + +check-docs-image: + @if docker images $(DOCKER_PLATFORM) -q $(DOCS_IMAGE):$(TAG) 2>/dev/null | grep -q .; then \ + echo "Docker image $(DOCS_IMAGE):$(TAG) exists"; \ + else \ + echo "Docker image $(DOCS_IMAGE):$(TAG) does not exist. Creating:"; \ + $(MAKE) build-docs-image; \ + fi + +docs-image-exec: check-docs-image + docker run --hostname playground --rm \ + -e USER=$$(id -u -n) \ + -e GROUP=$$(id -g -n) \ + -e UID=$$(id -u) \ + -e GID=$$(id -g) \ + -v $(CURDIR):/work \ + -w /work \ + $(DOCS_IMAGE):$(TAG) /bin/bash -c "$(CONTAINER_COMMAND)" + +run-docs-image: check-docs-image + docker run \ + --hostname playground \ + --name playground \ + --rm \ + -e USER=$$(id -u -n) \ + -e GROUP=$$(id -g -n) \ + -e UID=$$(id -u) \ + -e GID=$$(id -g) \ + -v $(CURDIR):/work \ + -w /work \ + -it \ + $(DOCS_IMAGE):$(TAG) \ + /bin/bash + +build-docs-image: | ensure-rootfs-docs + @if docker image inspect cartesi/machine-emulator:$(TAG) >/dev/null 2>&1; then \ + echo "Docker image cartesi/machine-emulator:$(TAG) exists, reusing"; \ + else \ + $(MAKE) -C .. build-emulator-image TAG=$(TAG); \ + fi + base=$$(docker image inspect --format '{{ index .Config.Labels "io.cartesi.machine-emulator.base-image" }}' cartesi/machine-emulator:$(TAG)); \ + docker build $(DOCKER_PLATFORM) --build-arg TAG=$(TAG) $${base:+--build-arg BUILDER_BASE=$$base} -t $(DOCS_IMAGE):$(TAG) . + +$(REPLACE_CACHE_DIR): + mkdir -p $@ + +ifeq ($(DEV_ENV_HAS_DOCGEN),yes) + +# Dry-run: scan the template, populate cache directories, and emit one make +# rule per annotated block into template.d. The rendered output IS the .d +# file (pandoc emits the RawBlock that replace.lua builds). +template.d: README.md.template replace.lua | $(REPLACE_CACHE_DIR) + REPLACE_CACHE_DIR=$(REPLACE_CACHE_DIR) RECIPES_DIR=$(RECIPES_DIR) \ + pandoc -f markdown -t plain \ + -M write-user-dependencies=README.md \ + -M docs_image=$(DOCS_IMAGE):$(TAG) \ + --lua-filter replace.lua README.md.template -o $@ + +ifeq (,$(filter clean,$(MAKECMDGOALS))) +-include template.d +endif + +# Diagrams rendered from committed sources under images/. Generated into the same +# dir and committed, like README.md itself. Regenerated only when a source changes. +# Order-only prereq of README.md below: a render always brings them up to date, +# without forcing a re-render when only the image bytes change (the template +# references the path, not the contents). +# Diagrams are committed SVGs under images/, like README.md itself, with no background of +# their own so they inherit the page. state-tree's ink colors depend on the theme, so it is +# built in a light and a dark variant that the template selects with a ; +# outputs-tree reads on both themes, so a single variant serves both. +DOC_DIAGRAMS := \ + images/outputs-tree.svg \ + images/state-tree-light.svg images/state-tree-dark.svg + +# outputs-tree pins its node positions for a perfectly symmetric binary tree (dot's own +# layout skews internal nodes), so it renders with neato -n, which uses the given positions +# and runs no layout. Held nodes are filled and the unmaterialized interiors are transparent +# outlines, so its colors read on both themes from one variant. +images/outputs-tree.svg: images/outputs-tree.dot + neato -n -Tsvg $< -o $@ + +# state-tree draws an actual machine's state hash-tree over the address space. It +# instantiates a machine and reads its memory ranges, so it is a Lua program that emits the +# SVG directly rather than a Graphviz source. Its first argument selects the palette. +images/state-tree-light.svg: images/state-tree.lua + lua5.4 $< light > $@ +images/state-tree-dark.svg: images/state-tree.lua + lua5.4 $< dark > $@ + +# Real run: render the document. template.d adds per-cache-file prereqs to +# README.md, so editing a recipe propagates here naturally. check-rootfs-docs is +# order-only so a recipe-input change that leaves the committed image stale fails +# the render here (as in CI) without forcing a re-render on every invocation. +README.md: README.md.template replace.lua alerts.lua github.template.md template.d | check-rootfs-docs $(DOC_DIAGRAMS) + REPLACE_CACHE_DIR=$(REPLACE_CACHE_DIR) RECIPES_DIR=$(RECIPES_DIR) \ + pandoc -f markdown -s -t gfm --toc \ + -M docs_image=$(DOCS_IMAGE):$(TAG) \ + --lua-filter replace.lua --lua-filter alerts.lua \ + --template=github.template.md \ + README.md.template -o $@ + +README.html: README.md README.md.template replace.lua alerts.lua github.template.html template.d + REPLACE_CACHE_DIR=$(REPLACE_CACHE_DIR) RECIPES_DIR=$(RECIPES_DIR) \ + pandoc -f markdown+emoji -s -t html5 --toc --mathjax \ + -M docs_image=$(DOCS_IMAGE):$(TAG) \ + --lua-filter replace.lua --lua-filter alerts.lua \ + --template=github.template.html \ + README.md.template -o $@ + +else + +# Without docgen tools on the host, delegate the whole build to the docs +# container. Inside, DEV_ENV_HAS_DOCGEN=yes activates the rules above and +# evaluates staleness against template.d. +.PHONY: README.md README.html +README.md: | check-docs-image + @jobs=$(if $(JOBS),$(JOBS),$$(docker run --rm $(DOCS_IMAGE):$(TAG) nproc)); \ + $(MAKE) docs-image-exec CONTAINER_COMMAND="make -j$$jobs README.md DEV_ENV_HAS_DOCGEN=yes" + +README.html: README.md + $(MAKE) docs-image-exec CONTAINER_COMMAND="make README.html DEV_ENV_HAS_DOCGEN=yes" +endif + +clean: + rm -f README.md template.d + rm -f $(RECIPES_DIR)/rootfs-docs.tar + rm -f $(ROOTFS_DOCS_KEY) $(ROOTFS_DOCS_KEY)-have + rm -rf $(REPLACE_CACHE_DIR) diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 000000000..b530edcd2 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,8467 @@ +# Table of contents + +- [Introduction](#introduction) + - [Scalability](#scalability) + - [Productivity](#productivity) + - [What’s in a machine](#whats-in-a-machine) + - [Rolling Cartesi Machines and Cartesi + Rollups](#rolling-cartesi-machines-and-cartesi-rollups) + - [Documentation](#documentation) +- [The host perspective](#the-host-perspective) + - [Machine playground](#machine-playground) + - [Command-line interface](#command-line-interface) + - [Initialization](#initialization) + - [Interactive sessions](#interactive-sessions) + - [Flash drives](#flash-drives) + - [Persistent flash drives and + NVRAMs](#persistent-flash-drives-and-nvrams) + - [Limiting execution](#limiting-execution) + - [State hashes](#state-hashes) + - [Persistent Cartesi Machines](#persistent-cartesi-machines) + - [Running as root](#running-as-root) + - [Cartesi Machine templates](#cartesi-machine-templates) + - [State value proofs](#state-value-proofs) + - [Remote Cartesi Machines](#remote-cartesi-machines) + - [Rolling Cartesi Machines](#rolling-cartesi-machines) + - [Rolling Cartesi Machine + templates](#rolling-cartesi-machine-templates) + - [Additional options](#additional-options) + - [Lua interface](#lua-interface) + - [Instantiation by configuration](#instantiation-by-configuration) + - [Default configuration](#default-configuration) + - [Generating configurations](#generating-configurations) + - [Additional sample + configurations](#additional-sample-configurations) + - [Loading and running machines](#loading-and-running-machines) + - [Instantiation from persistent + state](#instantiation-from-persistent-state) + - [Limiting execution](#limiting-execution-1) + - [Progress feedback](#progress-feedback) + - [Cartesi Machine templates](#cartesi-machine-templates-1) + - [State hashes](#state-hashes-1) + - [External state access](#external-state-access) + - [State value proofs](#state-value-proofs-1) + - [Remote Cartesi Machines](#remote-cartesi-machines-1) + - [Rolling Cartesi Machines](#rolling-cartesi-machines-1) + - [Output proofs](#output-proofs) + - [State-transition proofs](#state-transition-proofs) +- [The guest perspective](#the-guest-perspective) + - [Linux environment](#linux-environment) + - [Building a custom root + file-system](#building-a-custom-root-file-system) + - [Guest “Hello world!”](#guest-hello-world) + - [Flash drives and NVRAMs](#flash-drives-and-nvrams) + - [Initialization](#initialization-1) + - [Communication between guest and + host](#communication-between-guest-and-host) + - [System architecture](#system-architecture) + - [The main processor](#the-main-processor) + - [The microarchitecture](#the-microarchitecture) + - [The board](#the-board) + - [Linux setup](#linux-setup) +- [The blockchain perspective](#the-blockchain-perspective) + - [Hash-view of state](#hash-view-of-state) + - [Slicing and splicing](#slicing-and-splicing) + - [Template instantiation](#template-instantiation) + - [Result extraction](#result-extraction) + - [The output hashes tree](#the-output-hashes-tree) + - [Output verification](#output-verification) + - [Verification game](#verification-game) + - [Settling a dispute](#settling-a-dispute) + - [One bisection level](#one-bisection-level) + - [Verifying the state transition](#verifying-the-state-transition) + - [Verifying the result](#verifying-the-result) + - [Running the game](#running-the-game) + +# Introduction + +The Cartesi Machine is Cartesi’s solution for verifiable computation. It +was designed to bring mainstream scalability to decentralized +applications and mainstream productivity to their developers. + +## Scalability + +Applications running exclusively on smart contracts face severe +constraints on the amount of data they can manipulate and on the +complexity of computations they can perform. These limitations manifest +themselves as exorbitant transaction costs and, even if such costs could +somehow be overcome, as extremely long computation times. + +In comparison, applications running inside Cartesi Machines can process +practically unlimited amounts of data, and at a pace orders of magnitude +faster. This is possible because Cartesi Machines run off-chain, free of +the overhead imposed by the consensus mechanisms used by blockchains. + +In a typical scenario, one of the parties involved in an application +will execute the Cartesi Machine off-chain and report its results to the +blockchain. Different parties do not need to trust each other because +the Cartesi platform includes an automatic dispute mechanism for Cartesi +Machines. All interested parties repeat the computation off-chain and, +if their results do not agree, they enter into a dispute, which the +mechanism guarantees to be always won by an honest party against any +number of dishonest parties. + +To enable this dispute mechanism, Cartesi Machines are executed inside a +special emulator that has three unique properties: + +- Cartesi Machines are *self contained* — They run in isolation from any + external influence on the computation; +- Cartesi Machines are *reproducible* — Two parties performing the same + computation always obtain exactly the same results; +- Cartesi Machines are *transparent* — They expose their entire state + for external inspection. + +From the point of view of the blockchain, the disputes require only a +tiny fraction of the amount of computation performed by the Cartesi +Machine. Dispute resolution thus becomes an ordinary task and dishonest +parties are generally expected to be exposed, which discourages the +posting of incorrect results and further increases the efficiency of the +platform. + +Cartesi Machines allow decentralized applications to take advantage of +vastly increased computing capabilities off-chain, while enjoying the +same security guarantees offered by code that runs natively as smart +contracts. This is what Cartesi means by scalability. + +## Productivity + +Scalability is not the only impediment to widespread blockchain +adoption. Another serious limiting factor is the reduced developer +productivity. + +Modern software development involves the combination of dozens of +off-the-shelf software components. Creating these components took the +concerted effort of an active worldwide community over the course of +several decades. They have all been developed and tested using +well-established toolchains (programming languages, compilers, linkers, +profilers, debuggers, etc.), and rely on multiple services provided by +modern operating systems (memory management, multi-tasking, file +systems, networking, etc.). + +Smart contracts are developed using ad-hoc toolchains, and run directly +on top of custom virtual machines, without the support of an underlying +operating system. This arrangement deprives developers of the tools of +their trade, severely reduces their expressive power, and consequently +decimates their productivity. + +In contrast, Cartesi Machines are based on a proven platform: +[RISC-V](https://riscv.org/). RISC-V was born of research in academia at +UC Berkeley. It is now maintained by its own independent foundation. It +is important to keep in mind that, unlike many of its academic +counterparts, RISC-V is not a toy architecture. It is suitable for +direct native hardware implementation, which is indeed currently +commercialized by a large (and ever-increasing) number of +[vendors](https://en.wikipedia.org/wiki/RISC-V#Implementations). This +means that, in the future, Cartesi will not be limited to emulation or +binary translation off-chain. The RISC-V platform is supported by a +vibrant community of developers. Their efforts have produced an +extensive software infrastructure, most notably ports of the Linux +Operating System and the GNU toolchain. + +By moving key parts of their application logic to run inside Cartesi +Machines, but on top of the Linux Operating System, developers are +isolated not only from the limitations and idiosyncrasies of specific +blockchains, but also from irrelevant details of the Cartesi Machine +architecture itself. They regain access to all the tools they have come +to rely on when writing applications. + +This is Cartesi’s contribution to empowering application developers to +express their creativity unimpeded, and to boost their productivity. + +## What’s in a machine + +The key components of a Cartesi Machine are its main processor and a +board. The processor that performs the computations, executing the +traditional fetch-execute loop while maintaining a variety of registers, +implements a generous set of RISC-V extensions. The board defines the +surrounding environment with an assortment of memories (RAM, flash +drives, NVRAMs etc) and a number of devices. Memories and devices are +mapped to the 64-bit physical address space of the Cartesi Machine. The +amount of RAM, as well as the number, length, and position of the flash +drives and NVRAMs in the address space can be chosen according to the +needs of each particular application. The Cartesi Machine emulator is a +program that carefully implements the Cartesi Machine architecture so +that its execution is reproducible in production. During development, a +variety of convenient devices can be added to the Cartesi Machine that +make prototyping more ergonomic. The emulator can be built from the +[`cartesi/machine-emulator`](https://github.com/cartesi/machine-emulator) +repository. + +The Cartesi Machine also includes a microarchitecture (uarch) that can +drive the main processor using a much-reduced RISC-V ISA. This is +necessary to enable verifiability in architectures that, due to +computational limitations, cannot emulate a main-processor instruction, +such as blockchains. Running the uarch until it halts, and then +resetting the uarch to its pristine state, is equivalent to executing +one instruction of the main processor. See [the microarchitecture +section](#the-microarchitecture) for details. + +The initialization of a Cartesi Machine typically loads the Linux kernel +into RAM, and a Linux root file-system (as a flash drive) from regular +files in the host file-system. The Linux kernel `linux.bin`, is built by +the +[`cartesi/machine-linux-image`](https://github.com/cartesi/machine-linux-image) +repository. After it is done with its own initialization, the Linux +kernel cedes control to the `/usr/sbin/cartesi-init` program in the root +file-system. The root file-system `rootfs.ext2` contains all the data +files and programs that make up the Linux distribution. It is built by +the +[`cartesi/machine-rootfs-image`](https://github.com/cartesi/machine-rootfs-image) +repository. The components of the guest application can reside in the +root file-system itself, or in their own, separate file-systems. The +emulator can be instructed to execute whatever command is necessary to +start the guest application. For a complete description of the Cartesi +Machine architecture and the boot process, see the documentation for +[the guest perspective](#the-guest-perspective). + +There are two distinct modes of operation. In the first mode, a Cartesi +Machine is initialized and tasked to run a guest application until the +machine *halts*. Inputs for the guest application can be provided as +additional flash drives with file-systems, or NVRAMs with raw data. +Outputs are only available to the host after the machine halts. Once it +halts, the machine cannot perform any additional computations. + +In the second mode of operation, the guest application runs in a loop. +In each iteration, it obtains a request carrying an input, performs any +necessary computations to service the request, and produces a number of +responses. Indeed, this is much like a server in which the guest +application can interact with the outside world. We say that a Cartesi +Machine operating in this mode is a *Rolling Cartesi Machine*. + +### Rolling Cartesi Machines and Cartesi Rollups + +Rolling Cartesi Machines accept two types of requests: advance-state +requests and inspect-state requests. Advance-state requests can create +persistent changes to the state of the underlying Cartesi Machine. In +contrast, inspect-state requests leave the state unchanged. + +Both types of request are serviced by the guest application, which +modifies the state of the Cartesi Machine while doing so. When servicing +an advance-state request, the guest application ultimately either +accepts or rejects it. The resulting modifications are kept only when +the request is accepted, and reverted when it is rejected. State +modifications are always reverted after inspect-state requests are +serviced. + +The stringent demands of reproducibility prevent a Cartesi Machine from +communicating *directly* with the outside world. Indeed, if two parties +were to run the same Cartesi Machine and then disagree on the data each +instance independently obtained from a network connection, there would +be no way to settle a dispute between them. Instead, Rolling Cartesi +Machines communicate with the outside world under controlled conditions, +through *Cartesi Rollups*. + +In a nutshell, Cartesi Rollups uses the blockchain to maintain a public +record of advance-state requests targeting each Rolling Cartesi Machine. +Both the order and the inputs carried by these requests are recorded and +made available in an indisputable fashion. Since Cartesi Machines are +deterministic, and since the inputs are agreed upon, the state of a +Rolling Cartesi Machine can be advanced in a well-defined way, always +producing the same set of responses, no matter who runs it. + +After producing each response to a request, the guest application asks +the machine to *yield* control back to the host. The host extracts the +response and *resumes* the machine. When done with a given input, the +guest application once again asks the machine to yield control back to +the host. At the same time, it notifies the host whether the input was +accepted or rejected. The host then prepares the input for the next +request, and *resumes* either the modified machine or a backup copy, so +the guest application can service the next request in a new iteration of +its loop. Inputs and responses are transferred in special memory ranges +(*CMIO* memory ranges). + +Advancing the state of a Rolling Cartesi Machine can produce four types +of response: *vouchers*, *notices*, *reports*, and *exceptions*. +Vouchers allow a Rolling Cartesi Machine to interact back with the +blockchain. A voucher issued by the guest application may, for example, +grant a user the right to withdraw tokens locked into a custodial smart +contract. Notices are used to register noteworthy changes to the state +of the guest application. A notice may be issued, for example, +announcing the demise of a character in a game or some other relevant +state transition. Disputes over the fact that a voucher or notice has +been generated while advancing the state of a Rolling Cartesi Machine +can be settled by Cartesi Rollups. Reports, in contrast, are used to +output any data that is irrelevant to the blockchain. A report may, for +example, provide diagnostic information on the reasons why an input has +been rejected. + +*Rejecting an input not only reverts the state, but also cancels all +vouchers and notices emitted while the request was serviced.* + +The advance-state requests serviced by a Rolling Cartesi Machine are +grouped into *epochs*. At the end of an epoch, the state of the machine +is finalized, so its state hash becomes known. From the finalized state +one can read the *output hashes root hash*, a single hash that commits +to every voucher and notice the machine has ever emitted. This hash is +the root of a Merkle tree maintained inside the machine, where each leaf +is the hash of one of the outputs, in the order they are emitted. (The +index of an output is its leaf position.) Given the contents of an +output, and a proof that its hash is the leaf at that index in the tree, +it is therefore possible to verify that the machine has in fact produced +that output. This is how Cartesi Rollups settles disputes over the +vouchers and notices a Rolling Cartesi Machine produces. + +Between state advances, it is possible to inspect the state of a Rolling +Cartesi Machine. This works by sending a query for processing inside the +Cartesi Machine. State inspection produces only reports and exceptions. +*All modifications to the state due to servicing queries are reverted +after the responses are collected.* + +An exception, which either kind of request may produce, signals an +irrecoverable error encountered by the guest application. + +## Documentation + +Cartesi Machines can be seen from three different perspectives: + +- *The host perspective* — This is the environment right outside the + Cartesi Machine emulator. It is most relevant to developers setting up + Cartesi Machines, running them, or manipulating their contents. It + includes the emulator’s API in all its flavors: C, C++, Lua, JSON-RPC, + and the command-line interface; +- *The guest perspective* — This is the environment inside the Cartesi + Machine. It encompasses Cartesi’s particular flavor of the RISC-V + architecture, as well as the organization of the Linux Operating + System that runs on top of it. It is most relevant to programmers + responsible for the application components that run off-chain but must + be verifiable. The cross-compiling toolchain, and the tools used to + build the Linux kernel and the Linux root file-systems are also + important from this perspective, even though they are used in the + host; +- *The blockchain perspective* — This is the view smart contracts have + of Cartesi Machines. It consists almost exclusively of the + manipulation of cryptographic hashes of the state of Cartesi Machines + and parts thereof. In particular, using only hash operations, the + blockchain can verify assertions concerning the contents of the state, + and can obtain the state hash that results from modifications to the + state. Notably, this includes direct verification by the blockchain of + RISC-V instructions performed by the uarch, and ZK proofs of batches + of RISC-V instructions performed by the main processor. + +As with every computer, the level of knowledge required to interact with +Cartesi Machines depends on the nature of the application being created. +Simple applications make only modest demands of each kind of developer. +Guest developers code a few scripts invoking pre-installed software +components. Host developers fill out a configuration file specifying the +location of the components needed to build a Cartesi Machine. Blockchain +developers instantiate one of the high-level contracts provided by +Cartesi. At the other extreme are the developers contributing to the +Cartesi ecosystem, who regularly write, build, and deploy custom +software components to run in the guest, or even change the Linux kernel +to support Cartesi-specific devices. Additionally, these developers +programmatically control the creation and execution of Cartesi Machines +in the host, and must also understand and use the hash-based state +manipulation primitives the blockchain needs. + +Although Cartesi’s goal is to shield platform users from as much +complexity as possible, there is value in making information available +to the greatest extent possible. To that end, this documentation of +Cartesi Machines aims to provide enough information to cover all three +perspectives, at all depths of understanding. + +# The host perspective + +Cartesi’s reference off-chain implementation of a Cartesi Machine is +based on software emulation. The emulator is written in C++23 with +well-insulated POSIX dependencies. The +[`cartesi/machine-emulator`](https://github.com/cartesi/machine-emulator) +repository can be used to build and install the Cartesi Machine +emulator. The emulator is implemented by a C++ class that can be +accessed in a variety of different ways. + +When linked to a C++ application, the emulator can be controlled +directly via the interface of the `cartesi::machine` class. C +applications can control the emulator in a similar way, by means of a +matching C API defined in the include file `cm.h`. The C API is stable +and complete. It is the basis for the creation of binds in other +programming languages, most notably the Lua programming language. The +emulator can be accessed via a `cartesi` module that exposes a +`cartesi.machine` interface to Lua programs. Additionally, Cartesi +provides a JSON-RPC server that can run a Cartesi Machine instance that +is controlled remotely. The server supports JSON-RPC discovery so client +libraries can be generated automatically. Finally, there is a +command-line utility (written in Lua) that can configure and run Cartesi +Machines for rapid prototyping. The C, C++, Lua APIs as well as the +command-line utility can seamlessly instantiate local emulators or +connect to remote JSON-RPC servers. + +The documentation starts from the command-line utility, +`cartesi-machine`. This utility is used for most prototyping tasks. The +documentation then covers the Lua interface of `cartesi.machine`. The C, +C++, and JSON-RPC interfaces closely mirror the Lua interface documented +here, so this document does not cover them separately. The C API is +defined in the `cm.h` header. The JSON-RPC API supports discovery, so +client bindings can be generated from a running server. + +## Machine playground + +The setup of a new development environment is often a time-consuming +task. This is particularly true in case of cross-development +environments (i.e., when the development happens in a host platform but +software runs in a different target platform). With this in mind, the +Cartesi team provides the `cartesi/machine-emulator-docs` Docker image +for use while reading this documentation. The Docker image enables +immediate experimentation with Cartesi Machines, as well as the +generation of the documentation itself. It comes with a pre-built +emulator and Lua interpreter accessible within the command-line, as well +as a pre-built RAM image and root file-system. It also comes with the +cross-compiler for the RISC-V architecture on which the Cartesi Machine +is based. + +To enter the playground, open a terminal, download the Docker image from +Cartesi’s repository, and run it adequately mapping the current user and +group information, as well as making the host’s current directory +available inside the container: + +``` bash +docker pull cartesi/machine-emulator-docs:devel +``` + +``` bash +docker run \ + --hostname playground \ + --name playground \ + --rm \ + -e USER=$(id -u -n) \ + -e GROUP=$(id -g -n) \ + -e UID=$(id -u) \ + -e GID=$(id -g) \ + -v "$(pwd)":/work \ + -w /work \ + -it \ + cartesi/machine-emulator-docs:devel \ + /bin/bash +``` + +Once inside, you can execute the `cartesi-machine` utility as follows: + +``` bash +cartesi-machine --help | head -n 40 +``` + +``` text +Usage: + + /usr/share/lua/5.4/cartesi-machine.lua [options] [command] [arguments] + +where options are: + --help + display this information. + + --version + display cartesi machine version information and exit. + + --version-json + display cartesi machine semantic version and exit. + + --bash-completion + print a bash completion script for this program to stdout and exit. + Install with: source <(cartesi-machine --bash-completion) + + --assert-version=.[.] + exit with failure in case the cartesi machine emulator version mismatches + + --remote-spawn + spawns a remote cartesi machine, + when --remote-address is specified, it listens on the specified address, + otherwise it listens on "127.0.0.1:0". + + --remote-address=: + use a remote cartesi machine listening to : instead of + running a local cartesi machine. + + --remote-health-check + checks health of remote server and exit + + --remote-fork[=:] + fork the remote cartesi machine before the execution, + in case an address is specified the new forked server will be rebound to it. + + --remote-shutdown + shutdown the remote cartesi machine after the execution. + +... +``` + +A final check can also be performed to verify if the contents inside the +container are as expected: + +``` bash +sha256sum /usr/share/cartesi-machine/images/linux.bin +``` + +``` text +9fb5aaa623f35c0ef2138c8dadf1b0a1e388e0f51758cb6a5477ec5cecd3a029 /usr/share/cartesi-machine/images/linux.bin +``` + +``` bash +sha256sum /usr/share/cartesi-machine/images/rootfs.ext2 +``` + +``` text +fe62e2afa2d46eb85b79380f7fce5e47c9852c0b4c93ca751fe96923b555843c /usr/share/cartesi-machine/images/rootfs.ext2 +``` + +Note that, if the hashes of the files you are using do not match the +ones above, then when you attempt to replicate the examples in the +documentation, you will obtain different hashes. Moreover, the cycle +counts and outputs may also differ. + +## Command-line interface + +In the simplest usage scenario, the `cartesi-machine` command-line +utility can be used to define a Cartesi Machine and run it until it +halts. The command-line utility, however, is very versatile. It was +designed to simplify the most common prototyping tasks. + +The simplest invocation takes no arguments + +``` bash +cartesi-machine +``` + +and produces the output + +``` text + + . + / \ + / \ +\---/---\ /----\ + \ X \ + \----/ \---/---\ + \ / CARTESI + \ / MACHINE + ' + +Nothing to do. + +Halted +Cycles: 41860482 +``` + +The utility instantiates a default Cartesi Machine and runs it until it +halts. The Linux kernel boots, the Cartesi-provided `cartesi-init` +script prints the ASCII-art splash and reports there is nothing to do, +then gracefully halts the machine. This takes many millions of cycles to +complete: time mostly spent initializing the Linux kernel. The utility +regains control from the emulator, and prints the `Halted` message and +the cycle count. + +### Initialization + +The following command instructs `cartesi-machine` to build a Cartesi +Machine. The machine has 128MiB of RAM, uses `linux.bin` as the RAM +image, and uses `rootfs.ext2` as the root file-system. (`linux.bin` is +generated by +[machine-linux-image](https://github.com/cartesi/machine-linux-image) +and `rootfs.ext2` is generated by +[machine-rootfs-image](https://github.com/cartesi/machine-rootfs-image). +Sample files are available in the `cartesi/machine-emulator-docs` Docker +image, which can be built from the `doc/` directory of the +[machine-emulator](https://github.com/cartesi/machine-emulator) +repository.) Once initialization is complete, the machine executes the +command `ls /bin` and exits. + +``` bash +cartesi-machine \ + --quiet \ + --no-init-splash \ + --ram-length=128Mi \ + --ram-image="/usr/share/cartesi-machine/images/linux.bin" \ + --flash-drive="label:root,data_filename:/usr/share/cartesi-machine/images/rootfs.ext2" \ + -- ls /bin +``` + +The `--quiet` option suppresses the output of `cartesi-machine.lua` +itself, leaving visible only what is produced inside the machine. The +command-line option `--no-init-splash` instructs the utility to skip the +splash, keeping the output focused on the example at hand. The +`--ram-image`, `--ram-length`, and `--flash-drive` command-line options +have the values in the example as default, so these options can be +omitted. To remove these default settings, use the command-line options +`--no-ram-image` and `--no-root-flash-drive`, respectively. + +The simplified command-line is + +``` bash +cartesi-machine \ + --quiet \ + --no-init-splash \ + -- ls /bin +``` + +The output is + +``` text +'[' gunzip rgrep + addpart gzexe rm + apt gzip rmdir + apt-cache hardlink rollup + apt-cdrom head rollup-http-server + apt-config hex rollup-init + apt-get hostid run-parts + apt-key hostname runcon + apt-mark iconv savelog + arch id script +... +``` + +It shows the listing of directory `/bin/` inside the root file-system. +The listing was produced by the entrypoint command that follows the `--` +separator in the command line. By a method explained in great detail +later on (see [The guest perspective initialization](#initialization-1)) +the entrypoint is picked up by the Cartesi-provided +`/usr/sbin/cartesi-init`, which executes it before gracefully halting +the machine. + +> [!NOTE] +> +> In many of the documentation examples, the utilities invoked from the +> command-line executed by a Cartesi Machine are in the default search +> path for executables. (This is set up by the Cartesi-provided +> `/usr/sbin/cartesi-init` script itself.) When in doubt, or when using +> your own executables installed in custom locations, make sure to +> invoke them by using their full paths (e.g., `/bin/ls` or `/bin/sh` +> instead of simply `ls` and `sh`.) + +### Interactive sessions + +By default, the `cartesi-machine` utility executes the Cartesi Machine +in non-interactive mode. Verifiable computations must always be run in +non-interactive sessions. User interaction with a Cartesi Machine via +the console is, after all, not reproducible. Nevertheless, during +development, it is often convenient to directly interact with the +emulator, as if using a computer console. + +The command-line option `-i` (short for `--htif-console-getchar`) +instructs the emulator to monitor the console for input, and to make +this input available to the Linux kernel. Typically, this option will be +used in conjunction with the `--` separator and the command `sh`, +causing the Cartesi-provided `/usr/sbin/cartesi-init` script to drop +into an interactive shell. Interaction with the shell enables the +exploration of the Linux distribution from the inside. Exiting the shell +returns control back to `/usr/sbin/cartesi-init`, which then gracefully +halts the machine. + +For example, if an interactive session is started with the following +command + +``` bash +cartesi-machine \ + --no-init-splash \ + -i \ + -- sh +``` + +it drops into the shell. Running the command `ls /bin` causes the +listing of directory `/bin` to appear. Pressing Ctrl+D at the prompt +then causes the shell to exit. The output is + +``` text +$ ls /bin +'[' gunzip rgrep + addpart gzexe rm + apt gzip rmdir + apt-cache hardlink rollup + apt-cdrom head rollup-http-server + apt-config hex rollup-init + apt-get hostid run-parts + apt-key hostname runcon + apt-mark iconv savelog +... +``` + +> [!NOTE] +> +> When running in interactive mode, not even the final cycle count is +> reproducible. To avoid busy wait for new interactive input, the +> emulator sleeps from one Cartesi Machine timer interrupt to the next, +> skipping Cartesi Machine cycles forward so programs running inside +> stay *roughly* in sync with wall-clock time outside. This dynamic +> balancing act is sure to vary between executions and across different +> computers. + +### Flash drives + +The command-line option +`--flash-drive=label: