diff --git a/.github/workflows/RegenSnapshotGoldens.yml b/.github/workflows/RegenSnapshotGoldens.yml new file mode 100644 index 000000000..eb09a1901 --- /dev/null +++ b/.github/workflows/RegenSnapshotGoldens.yml @@ -0,0 +1,148 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + +# Regenerate snapshot goldens stored at +# ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens. +# +# Run order: +# +# 1. Bump `GOLDENS_VERSION` in +# `src/hyperlight_host/tests/snapshot_goldens/platform.rs` and any +# matching constants in +# `src/hyperlight_host/src/sandbox/snapshot/tripwires.rs`. +# 2. Push the bump on a branch. +# 3. Dispatch this workflow against the branch, passing the same +# version string as the `version` input. +# +# The workflow walks every (hv, cpu, config) cell, dumps the +# canonical init+call snapshots locally, then `oras copy`s each as +# its own GHCR tag named +# `{version}-{hv}-{cpu}-{profile}-{kind}`. + +name: Regenerate Snapshot Goldens + +on: + workflow_dispatch: + inputs: + version: + description: Goldens version string. Must match GOLDENS_VERSION in source (e.g. "v1.0"). + required: true + type: string + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + GHCR_IMAGE: ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens + +permissions: + contents: read + packages: write + +defaults: + run: + shell: bash + +jobs: + build-guests: + strategy: + matrix: + config: [debug, release] + uses: ./.github/workflows/dep_build_guests.yml + with: + config: ${{ matrix.config }} + secrets: inherit + + dump-and-push: + needs: build-guests + strategy: + fail-fast: false + matrix: + hypervisor: [kvm, mshv3, hyperv-ws2025] + cpu: [amd, intel] + config: [debug, release] + runs-on: ${{ fromJson( + format('["self-hosted", "{0}", "X64", "1ES.Pool=hld-{1}-{2}", "JobId=regen-goldens-{3}-{4}-{5}-{6}"]', + matrix.hypervisor == 'hyperv-ws2025' && 'Windows' || 'Linux', + matrix.hypervisor == 'hyperv-ws2025' && 'win2025' || matrix.hypervisor == 'mshv3' && 'azlinux3-mshv' || matrix.hypervisor, + matrix.cpu, + matrix.config, + github.run_id, + github.run_number, + github.run_attempt)) }} + steps: + - uses: actions/checkout@v6 + + - uses: hyperlight-dev/ci-setup-workflow@v1.9.0 + with: + rust-toolchain: "1.89" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Fix cargo home permissions + if: runner.os == 'Linux' + run: sudo chown -R $(id -u):$(id -g) /opt/cargo || true + + - name: Download Rust guests + uses: actions/download-artifact@v7 + with: + name: rust-guests-${{ matrix.config }} + path: src/tests/rust_guests/bin/${{ matrix.config }}/ + + - name: Install oras + run: | + set -euo pipefail + if command -v oras >/dev/null 2>&1; then + echo "oras already installed: $(oras version)" + exit 0 + fi + ORAS_VERSION=1.2.2 + if [ "${{ runner.os }}" = "Windows" ]; then + curl -sSLO "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_windows_amd64.zip" + unzip -q "oras_${ORAS_VERSION}_windows_amd64.zip" -d oras_install + mv oras_install/oras.exe /usr/bin/oras.exe + else + curl -sSLO "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" + mkdir -p oras_install + tar -xzf "oras_${ORAS_VERSION}_linux_amd64.tar.gz" -C oras_install + sudo install -m 0755 oras_install/oras /usr/local/bin/oras + fi + oras version + + - name: Verify GOLDENS_VERSION matches input + run: | + set -euo pipefail + IN_SRC=$(grep -oE 'GOLDENS_VERSION: &str = "[^"]+"' src/hyperlight_host/tests/snapshot_goldens/platform.rs | head -n1 | sed -E 's/.*"([^"]+)".*/\1/') + echo "GOLDENS_VERSION in source: ${IN_SRC}" + echo "version input: ${{ inputs.version }}" + if [ "${IN_SRC}" != "${{ inputs.version }}" ]; then + echo "::error::version input does not match GOLDENS_VERSION in source" + exit 1 + fi + + - name: Generate snapshots + run: | + set -euo pipefail + OUT="${{ github.workspace }}/snapshot-goldens-out" + mkdir -p "$OUT" + PROFILE_FLAG=$([ "${{ matrix.config }}" = "release" ] && echo "--release" || echo "") + FEATURES=$([ "${{ matrix.hypervisor }}" = "mshv3" ] && echo "mshv3" || echo "kvm") + if [ "${{ runner.os }}" = "Windows" ]; then FEATURES=""; fi + cargo test $PROFILE_FLAG -p hyperlight-host \ + ${FEATURES:+--no-default-features --features "$FEATURES,build-metadata"} \ + --test snapshot_goldens -- generate "$OUT" + ls -la "$OUT" + + - name: Log in to GHCR + run: | + echo "${{ secrets.GITHUB_TOKEN }}" | oras login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Push goldens to GHCR + run: | + set -euo pipefail + OUT="${{ github.workspace }}/snapshot-goldens-out" + for layout in "$OUT"/*/; do + tag=$(basename "$layout") + tag=${tag%/} + echo "::group::push ${tag}" + oras copy --from-oci-layout "${layout%/}:${tag}" "${GHCR_IMAGE}:${tag}" + echo "::endgroup::" + done diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml index 8d3507717..b994860bb 100644 --- a/.github/workflows/ValidatePullRequest.yml +++ b/.github/workflows/ValidatePullRequest.yml @@ -89,7 +89,7 @@ jobs: # See: https://github.com/actions/runner/issues/2205 if: ${{ !cancelled() && !failure() }} strategy: - fail-fast: true + fail-fast: false matrix: hypervisor: ['hyperv-ws2025', mshv3, kvm] cpu: [amd, intel] diff --git a/CHANGELOG.md b/CHANGELOG.md index 53fef075d..42f7e3c9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Prerelease] - Unreleased +### Added +* `Snapshot::to_oci`, `Snapshot::from_oci`, and `Snapshot::from_oci_unchecked` for persisting and loading sandbox snapshots as OCI Image Layout directories by @ludfjig in https://github.com/hyperlight-dev/hyperlight/pull/1465 + ## [v0.15.0] - 2026-05-06 ### Added diff --git a/Cargo.lock b/Cargo.lock index d86f0fbcb..416ab015f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -504,6 +504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -518,6 +519,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "1.0.0" @@ -530,6 +543,27 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -730,6 +764,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "derive_arbitrary" version = "1.4.2" @@ -741,6 +810,37 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -860,6 +960,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "euclid" version = "0.22.13" @@ -1154,6 +1260,18 @@ dependencies = [ "wasip3", ] +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "gimli" version = "0.33.0" @@ -1361,6 +1479,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.4.0" @@ -1585,6 +1709,7 @@ dependencies = [ "gdbstub", "gdbstub_arch", "goblin", + "hex", "hyperlight-common", "hyperlight-component-macro", "hyperlight-guest-tracing", @@ -1594,12 +1719,14 @@ dependencies = [ "kvm-ioctls", "lazy_static", "libc", + "libtest-mimic", "log", "metrics", "metrics-exporter-prometheus", "metrics-util", "mshv-bindings", "mshv-ioctls", + "oci-spec", "opentelemetry", "opentelemetry-otlp", "opentelemetry-semantic-conventions", @@ -1612,6 +1739,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "sha2", "signal-hook-registry", "tempfile", "termcolor", @@ -1789,6 +1917,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -1912,6 +2046,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "kurbo" version = "0.11.3" @@ -2026,6 +2175,18 @@ dependencies = [ "libc", ] +[[package]] +name = "libtest-mimic" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" +dependencies = [ + "anstream 1.0.0", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "libz-sys" version = "1.1.23" @@ -2332,6 +2493,23 @@ dependencies = [ "ruzstd", ] +[[package]] +name = "oci-spec" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3da52b83ce3258fbf29f66ac784b279453c2ac3c22c5805371b921ede0d308" +dependencies = [ + "const_format", + "derive_builder", + "getset", + "regex", + "serde", + "serde_json", + "strum", + "strum_macros", + "thiserror", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -2780,6 +2958,28 @@ dependencies = [ "toml_edit", ] +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -3432,6 +3632,24 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "syn" version = "2.0.117" diff --git a/Justfile b/Justfile index 401897425..ded6d9edf 100644 --- a/Justfile +++ b/Justfile @@ -568,3 +568,52 @@ install-vcpkg: install-flatbuffers-with-vcpkg: install-vcpkg cd ../vcpkg && ./vcpkg install flatbuffers || cd - + +################################### +### SNAPSHOT GOLDEN HELPERS ### +################################### +# Custom-harness test binary that verifies / regenerates snapshot +# goldens stored on an OCI registry. The test binary itself never +# touches the network: it reads only from +# target/snapshot-goldens-cache/{version}/{tag}/. Populating that +# cache is the job of `snapshot-goldens-pull`, which shells out to +# `oras` (install from https://oras.land). + +# Default OCI registry image (without tag) that hosts the goldens. +default-snapshot-goldens-image := "ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens" + +# Verify the local snapshots against the goldens for the current +# GOLDENS_VERSION. Run `snapshot-goldens-pull` first to populate +# the local cache; missing cache entries cause hard test failures +# (the harness does not skip). +snapshot-goldens target=default-target: + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens + +# Pull goldens for the local platform's two tags (init + call) +# from `image` into the on-disk cache used by `snapshot-goldens`. +# Auto-detects hypervisor and CPU vendor on Linux; pass +# `profile=release` to fetch the release-profile tags. +snapshot-goldens-pull image=default-snapshot-goldens-image profile="debug": + #!/usr/bin/env bash + set -euo pipefail + version=$(awk -F'"' '/GOLDENS_VERSION: &str =/{print $2; exit}' src/hyperlight_host/tests/snapshot_goldens/platform.rs) + hv=${HYPERLIGHT_GOLDENS_HV:-$([[ -e /dev/mshv ]] && echo mshv || ([[ -e /dev/kvm ]] && echo kvm))} + cpu=$(awk -F: '/vendor_id/{gsub(/ /,"",$2); print $2; exit}' /proc/cpuinfo \ + | sed 's/GenuineIntel/intel/;s/AuthenticAMD/amd/') + [[ -n "${hv:-}" && -n "${cpu:-}" ]] || { echo "snapshot-goldens-pull: could not detect hv/cpu (set HYPERLIGHT_GOLDENS_HV)" >&2; exit 1; } + for kind in init call; do + tag="${version}-${hv}-${cpu}-{{ profile }}-${kind}" + dir="target/snapshot-goldens-cache/${version}/${tag}" + mkdir -p "${dir}" + oras copy --to-oci-layout "{{ image }}:${tag}" "${dir}:${tag}" + done + +# Generate the canonical local snapshots into the cache that +# `snapshot-goldens` reads from. Locally, `snapshot-goldens-generate` +# followed by `snapshot-goldens` is a pure local round-trip with +# no registry involved. The regen workflow calls the harness +# directly with an explicit out-dir for staging. +snapshot-goldens-generate target=default-target: + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens -- generate diff --git a/docs/snapshot-oci-format.md b/docs/snapshot-oci-format.md new file mode 100644 index 000000000..f7a701bc9 --- /dev/null +++ b/docs/snapshot-oci-format.md @@ -0,0 +1,96 @@ +# Hyperlight snapshot on-disk format + +Hyperlight serialises a `Snapshot` to disk as an [OCI Image Layout] +directory. `Snapshot::to_oci` writes one. `Snapshot::from_oci` and +`Snapshot::from_oci_unchecked` read one back. + +[OCI Image Layout]: https://github.com/opencontainers/image-spec/blob/main/image-layout.md + +## Directory layout + +```text +path/ + oci-layout {"imageLayoutVersion":"1.0.0"} + index.json one manifest descriptor per tag, + tagged via the OCI standard + `org.opencontainers.image.ref.name` + annotation + blobs/sha256/ + OCI image manifest JSON + Hyperlight config JSON + raw memory bytes + (`memory_size` bytes) +``` + +Three blob kinds per tag: + +* **manifest** (`application/vnd.oci.image.manifest.v1+json`). Tiny JSON + pointer record selected via `index.json`. References one config and + one layer by digest. +* **config** (`application/vnd.hyperlight.snapshot.config.v1+json`). The + snapshot descriptor: arch, ABI version, entrypoint sregs, memory + layout, registered host functions, snapshot generation counter. + Loaded eagerly and fully parsed. +* **layer / memory** (`application/vnd.hyperlight.snapshot.memory.v1`). + The raw guest memory image, exactly `memory_size` bytes. mmap'd on + restore. + +Blob filenames are the sha256 of the blob bytes, so identical blobs +across tags are stored once. + +## What is one snapshot + +A single saved `Snapshot` consists of exactly: + +* one entry in `index.json`, carrying the `tag` as + `org.opencontainers.image.ref.name`, +* one **manifest** blob (referenced by that index entry), +* one **config** blob (referenced by the manifest's `config` field), +* one **layer** blob (the only entry in the manifest's `layers` + array, holding the raw memory image). + +Saving two snapshots under different tags into the same `path` +produces two index entries and two manifests. Configs and layers are +deduplicated by content, so identical bytes are stored once and +referenced by both manifests. + +Saving the same tag a second time replaces that tag's index entry +and writes a fresh manifest. The previous manifest, and any of its +config or layer blobs that no other tag references, become orphans +in `blobs/sha256/`. + +## Write semantics + +`Snapshot::to_oci(path, tag)` opens or creates the OCI layout at +`path` and writes one snapshot under `tag`. The parent directory of +`path` must already exist. `path` itself is created if absent. An +existing layout at `path` is preserved: other tags are kept, and a +tag equal to `tag` is replaced. + +`index.json` is rewritten via a tmp file plus `rename`, the commit +point for the whole operation. A crash before that rename leaves the +prior layout intact. A crash after it leaves the new layout intact. + +Replaced tags leave orphan blobs behind. To compact, remove the +directory and re-save. Concurrent writers to the same `path` are +unsupported. + +This mirrors the merge behaviour of `containers/image` (skopeo, +podman), `go-containerregistry` (crane), and `regclient`. + +## Read semantics + +`Snapshot::from_oci(path, tag)` verifies sha256 for manifest, config, +and snapshot blobs. `Snapshot::from_oci_unchecked` skips the digest +verification, trading integrity for performance, and keeps every +other check (OCI structure, descriptor sizes, schema versions, arch / +hypervisor / ABI tags, layout bounds, entrypoint bounds). + +A missing tag or duplicate tag in `index.json` is rejected. + +## Portability + +Snapshot images are bound to a specific CPU architecture and +hypervisor. Both are recorded in the config blob and checked at load +time, with mismatches rejected with a clear error. The hypervisor +tag (`kvm`, `mshv`, `whp`) constrains the host OS. diff --git a/docs/snapshot-versioning.md b/docs/snapshot-versioning.md new file mode 100644 index 000000000..e75d28714 --- /dev/null +++ b/docs/snapshot-versioning.md @@ -0,0 +1,228 @@ +# Snapshot versioning + +Hyperlight snapshots are written to disk as OCI image layouts and may be +loaded by a different build than the one that produced them. This +document describes how to evolve the snapshot format while keeping +existing snapshots loadable, or while rejecting them with a clear error. + +## What is versioned + +A snapshot carries three independently evolvable version markers: + +* **Memory blob ABI**, `SNAPSHOT_ABI_VERSION` (a `u32` inside the + config blob, defined in + [src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs](../src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs)). + This is the host/guest runtime contract baked into the captured + memory: the `HyperlightPEB` layout (the struct host and guest share + to exchange state, field offsets and types), the `OutBAction` port + numbers (the I/O ports the guest writes to for `Log`, `CallFunction`, + `Abort`, `DebugPrint`), the layout of the sandbox memory regions + (stack, heap, guest binary, input and output buffers, page tables), + and the calling convention used for guest function entry. The loader + trusts the captured bytes to match this contract, so any change here + invalidates older snapshots unless an explicit compat path translates + them. +* **Snapshot blob encoding**, `MT_SNAPSHOT_V1` + (`application/vnd.hyperlight.snapshot.memory.v1`), aliased as + `MT_SNAPSHOT_CURRENT`. This is the on-wire format of the snapshot + blob: framing, section ordering, alignment, dirty/zero-page elision, + anything about how the bytes are packed inside the OCI layer. +* **Config schema**, `MT_CONFIG_V1` + (`application/vnd.hyperlight.snapshot.config.v1+json`), aliased as + `MT_CONFIG_CURRENT`. This is the JSON shape of the config blob: + field names, types, required vs optional, the descriptors the loader + needs in order to reconstruct the sandbox (memory sizes, buffer + sizes, `abi_version`, `hyperlight_version`, etc.). Renaming a field, + changing its type, or adding a required field is a schema change and + bumps this constant. + +The `OCI_LAYOUT_VERSION` constant is pinned by the OCI image-layout +spec at `1.0.0`. + +The config blob also records `hyperlight_version`, the `CARGO_PKG_VERSION` +of the host crate at write time. This is informational only. The loader +records it for diagnostics and does not gate loading on it. + +## Enforcement + +The format is large and easy to change by accident. Two mechanisms +catch a change to it so reviewers do not have to spot every break by +eye, and so a developer who breaks the format unintentionally finds +out at build time rather than in production. + +Compile-time tripwires in +[src/hyperlight_host/src/sandbox/snapshot/tripwires.rs](../src/hyperlight_host/src/sandbox/snapshot/tripwires.rs) +hold a copy of every value that defines the format: +`SNAPSHOT_ABI_VERSION`, the snapshot and config media-type strings, the +OCI layout version, every `HyperlightPEB` field offset and the struct's +total size, and every `OutBAction` discriminant. If the source value +drifts from the copy in `tripwires.rs`, the crate fails to compile. + +The snapshot golden verify test +(`cargo test -p hyperlight-host --test snapshot_goldens`) loads +snapshots from a local cache (populated by `just snapshot-goldens-pull`, +which fetches the tag set for the current `GOLDENS_VERSION` from GHCR) +and runs them through the current loader. If the new loader cannot +decode the old bytes, the test fails. + +## Changing the format + +When you change anything on the list above, you have three options. + +### Option 1: avoid the break + +Restructure the change so the on-disk contract stays put. Prefer this +whenever possible. + +### Option 2: backwards-compatible break + +You break the ABI for new snapshots, and you teach the loader to +accept the older version as well by translating it into the current +contract on the fly. For example, if you renumber the `OutBAction` +ports, the host's port dispatch keeps a match arm for the old port +number alongside the new one, so a resumed v1 guest that still writes +to the old port is handled correctly. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. The writer stamps this value into + every config blob it produces. +4. Update `Snapshot::from_oci` to load both the old and the new + format, dispatching on `abi_version`. +5. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +6. Bump `GOLDENS_VERSION` to the next major and push fresh goldens. See + [Goldens version numbering](#goldens-version-numbering) and + [Regenerating goldens](#regenerating-goldens). +7. Keep the old goldens on GHCR and extend the verify test to exercise + them as well, so the compatibility path stays covered. See + [Verifying multiple golden versions](#verifying-multiple-golden-versions). + +Old snapshots on disk continue to load. New snapshots use the new +contract. The compatibility path is now part of the supported surface +and must stay correct until you formally drop the old major. + +### Option 3: hard break + +You change the contract and the loader rejects old snapshots outright. +Using the same `OutBAction` example, the host's port dispatch only +matches on the new port number, and a resumed v1 guest writing to the +old port has nowhere to land. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. +4. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +5. Bump `GOLDENS_VERSION` to the next major and push fresh goldens. See + [Goldens version numbering](#goldens-version-numbering) and + [Regenerating goldens](#regenerating-goldens). +6. Record the break in `CHANGELOG.md`. Anyone holding old snapshots on + disk has to regenerate them against the new build. + +The loader's single-version check enforces the rejection. An old +snapshot loaded against the new build fails the +`abi_version == SNAPSHOT_ABI_VERSION` test with a clear error. + +## Regenerating goldens + +The verify test (`cargo test -p hyperlight-host --test snapshot_goldens`) +loads the tag set `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}-{kind}` from a +local cache that `just snapshot-goldens-pull` populates from GHCR. After +bumping `GOLDENS_VERSION`, the matching tags must be pushed before the +verify job can pass. + +### Iterating locally + +`just snapshot-goldens-generate` regenerates the cache for the current +`GOLDENS_VERSION` from the local source, so the verify test runs green +against your in-progress changes on your own platform. Use this loop +for iteration that does not need to cross hypervisor boundaries. To +validate the change on every platform, dispatch the regen workflow +(see [Push procedure](#push-procedure)). + +### Goldens version numbering + +`GOLDENS_VERSION` follows a `vMAJOR.MINOR` scheme. The tag set on GHCR +for a given version is keyed by the full string, so `v1.0`, `v1.1`, and +`v2.0` are independent namespaces that never collide. + +* Bump **MAJOR** when the snapshot ABI changes (Option 2 or Option 3 + above). The old tag set stays on GHCR untouched. +* Bump **MINOR** when the set of golden checks changes but the ABI does + not (for example, a new check is added). The new tag set contains + every check, including the unchanged ones, regenerated against the + current source. + +A version is frozen once `main` references it. The regen workflow, +before every push, reads `GOLDENS_VERSION` from the tip of `main` and +refuses to push to that tag. Any other tag, including the version the +current PR is introducing, is in-flight and may be overwritten freely. +This lets a developer iterate on a v1 to v2 bump by pushing v2 as many +times as needed, with no risk of touching v1. + +Overwriting a tag leaves the previous manifest on GHCR as an orphan. +A scheduled cleanup workflow that reaps orphans and abandoned in-flight +tags is a follow-up. + +### Push procedure + +1. Land the source bumps on a branch. +2. Dispatch the `Regenerate Snapshot Goldens` workflow against that + branch. The workflow walks every supported + `(hypervisor, cpu, profile)` combination on the self-hosted runner + pool, generates the canonical init and call snapshots locally with + `cargo test --test snapshot_goldens -- generate `, and pushes + each OCI layout to GHCR using `oras copy`. Before every push it + reads `GOLDENS_VERSION` from the tip of `main` and refuses the push + if the target tag matches. +3. The verify job on the PR can now find the tags and passes. + +The workflow takes a `version` input that must equal `GOLDENS_VERSION` +in source. This guards against pushing a tag set the test binary would +ignore. + +## Adding a new check under the current ABI + +Adding a new entry to `CHECKS` does not change the snapshot ABI. It +does change the set of tags the verify test expects, so it requires a +minor `GOLDENS_VERSION` bump. + +Steps: + +1. Add the entry to `CHECKS` in + `src/hyperlight_host/tests/snapshot_goldens/`. +2. Bump `GOLDENS_VERSION` minor (e.g. `v1.2` to `v1.3`). The verify + test now looks for tags under the new prefix and fails until they + exist. +3. A maintainer dispatches `Regenerate Snapshot Goldens` against the + branch with `version` set to the new `GOLDENS_VERSION`. The workflow + runs every check on every combination and publishes a complete tag set + under the new prefix. The previous tag set stays on GHCR untouched. +4. The verify job finds the new tag set and passes. + +The previous minor's tags can be deleted from GHCR once nothing depends +on them. + +## Verifying multiple golden versions + +The verify test pulls exactly one tag set, the one for the current +`GOLDENS_VERSION`. That covers the hard-break case (Option 3), where a +fresh tag set replaces the previous one. + +The backwards-compatible case (Option 2) needs more. A v1 loader path +is only correct if real v1 goldens load against the new build, which +means verifying against multiple versions in the same run. + +The intended design is to replace the single `GOLDENS_VERSION` constant +with a slice of currently supported major versions, e.g. +`pub const GOLDENS_VERSIONS: &[&str] = &["v1.3", "v2.0"];`, and have +the verify test run every check against every entry. Dropping an old +major is then a one-line removal from that slice. + +The single-version variant suffices for Option 3. Build the +multi-version variant the first time you take Option 2. diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 7e709b449..34fbb267c 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -48,9 +48,13 @@ thiserror = "2.0.18" chrono = { version = "0.4", optional = true } anyhow = "1.0" metrics = "0.24.6" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" elfcore = { version = "2.0", optional = true } uuid = { version = "1.23.2", features = ["v4"] } +oci-spec = { version = "0.8", default-features = false, features = ["image"] } +sha2 = "0.10" +hex = "0.4" [target.'cfg(windows)'.dependencies] windows = { version = "0.62", features = [ @@ -82,7 +86,6 @@ mshv-ioctls = { version = "0.6", optional = true} [dev-dependencies] uuid = { version = "1.23.2", features = ["v4"] } signal-hook-registry = "1.4.8" -serde = "1.0" iced-x86 = { version = "1.21", default-features = false, features = ["std", "code_asm"] } proptest = "1.11.0" tempfile = "3.27.0" @@ -106,6 +109,7 @@ metrics-util = "0.20.4" metrics-exporter-prometheus = { version = "0.18.3", default-features = false } serde_json = "1.0" hyperlight-component-macro = { workspace = true } +libtest-mimic = "0.8.2" [target.'cfg(windows)'.dev-dependencies] windows = { version = "0.62", features = [ @@ -144,3 +148,8 @@ guest-counter = ["hyperlight-common/guest-counter"] [[bench]] name = "benchmarks" harness = false + +[[test]] +name = "snapshot_goldens" +path = "tests/snapshot_goldens/main.rs" +harness = false diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 462e8908d..0f9ca5b2a 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -153,6 +153,15 @@ fn sandbox_lifecycle_benchmark(c: &mut Criterion) { ); } + // Isolates the cost of building a MultiUseSandbox from an + // already-resident Snapshot. The Snapshot is loaded outside the + // timed region. + for size in SandboxSize::all() { + group.bench_function(format!("sandbox_from_snapshot/{}", size.name()), |b| { + bench_sandbox_from_snapshot(b, size) + }); + } + group.finish(); } @@ -347,6 +356,25 @@ fn bench_snapshot_restore(b: &mut criterion::Bencher, size: SandboxSize) { }); } +fn bench_sandbox_from_snapshot(b: &mut criterion::Bencher, size: SandboxSize) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bench"); + { + let mut sbox = create_multiuse_sandbox_with_size(size); + let snapshot = sbox.snapshot().unwrap(); + snapshot.to_oci(&snap_path, "latest").unwrap(); + } + let loaded = std::sync::Arc::new(Snapshot::from_oci(&snap_path, "latest").unwrap()); + + b.iter(|| { + let _ = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + }); +} + fn snapshots_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("snapshots"); @@ -551,6 +579,118 @@ fn shared_memory_benchmark(c: &mut Criterion) { group.finish(); } +// ============================================================================ +// Benchmark Category: Snapshot Files +// ============================================================================ + +fn snapshot_file_benchmark(c: &mut Criterion) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let mut group = c.benchmark_group("snapshot_files"); + + // Pre-create OCI snapshot images for all sizes. + let dirs: Vec<_> = SandboxSize::all() + .iter() + .map(|size| { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join(size.name()); + let snapshot = { + let mut sbox = create_multiuse_sandbox_with_size(*size); + sbox.snapshot().unwrap() + }; + snapshot.to_oci(&snap_path, "latest").unwrap(); + (dir, snapshot, snap_path) + }) + .collect(); + + // Benchmark: save_snapshot. Wipe the layout between iterations + // so each save measures a fresh write rather than a tag-append. + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_dir = tempfile::tempdir().unwrap(); + let path = snap_dir.path().join("bench"); + let snapshot = &dirs[i].1; + group.bench_function(format!("save_snapshot/{}", size.name()), |b| { + b.iter_batched( + || { + let _ = std::fs::remove_dir_all(&path); + }, + |_| snapshot.to_oci(&path, "latest").unwrap(), + criterion::BatchSize::PerIteration, + ); + }); + } + + // Benchmark: load_snapshot (parse manifest + config + mmap blob). + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("load_snapshot/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_oci(&snap_path, "latest").unwrap(); + }); + }); + } + + // Benchmark: load_snapshot_unchecked (skip blob digest verification). + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("load_snapshot_unchecked/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_oci_unchecked(&snap_path, "latest").unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_evolve (new + evolve + call) + for size in SandboxSize::all() { + group.bench_function(format!("cold_start_via_evolve/{}", size.name()), |b| { + b.iter(|| { + let mut sbox = create_multiuse_sandbox_with_size(size); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot (load + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("cold_start_via_snapshot/{}", size.name()), |b| { + b.iter(|| { + let loaded = Snapshot::from_oci(&snap_path, "latest").unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot_unchecked (load unchecked + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function( + format!("cold_start_via_snapshot_unchecked/{}", size.name()), + |b| { + b.iter(|| { + let loaded = Snapshot::from_oci_unchecked(&snap_path, "latest").unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }, + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); @@ -561,6 +701,7 @@ criterion_group! { guest_call_benchmark_large_param, function_call_serialization_benchmark, sample_workloads_benchmark, - shared_memory_benchmark + shared_memory_benchmark, + snapshot_file_benchmark } criterion_main!(benches); diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index d9c69de2e..e5c224b22 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -1568,7 +1568,6 @@ impl ReadonlySharedMemory { /// The file's length must be a non-zero multiple of `PAGE_SIZE`. /// `guest_mapped_size` must be a non-zero multiple of `PAGE_SIZE` /// no greater than the file's length. - #[cfg_attr(not(test), expect(dead_code))] pub(crate) fn from_file(file: &std::fs::File, guest_mapped_size: usize) -> Result { let len: usize = file .metadata() diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 62a4600d0..4cab2acbb 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -185,6 +185,20 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// From a snapshot loaded from disk: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox}; + /// # use hyperlight_host::sandbox::snapshot::Snapshot; + /// # fn example() -> Result<(), Box> { + /// let snapshot = Arc::new(Snapshot::from_oci("./guest_snapshot", "latest")?); + /// let mut sandbox = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: String = sandbox.call("Echo", "hello".to_string())?; + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub fn from_snapshot( snapshot: Arc, diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/config.rs b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs new file mode 100644 index 000000000..0b7b2fe01 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs @@ -0,0 +1,875 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use hyperlight_common::flatbuffer_wrappers::function_types::{ParameterType, ReturnType}; +use hyperlight_common::flatbuffer_wrappers::host_function_definition::HostFunctionDefinition; +use hyperlight_common::vmem::PAGE_SIZE; +use serde::{Deserialize, Serialize}; + +use super::media_types::SNAPSHOT_ABI_VERSION; +use crate::hypervisor::regs::{CommonSegmentRegister, CommonSpecialRegisters, CommonTableRegister}; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::memory_region::MemoryRegionFlags; + +// --- Arch and hypervisor identifiers -------------------------------- + +/// Guest architecture the snapshot was captured for. Checked on load +/// against the running host. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub(super) enum Arch { + X86_64, + Aarch64, + I686, +} + +impl Arch { + pub(super) fn current() -> Self { + #[cfg(feature = "i686-guest")] + { + Self::I686 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))] + { + Self::X86_64 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "aarch64"))] + { + Self::Aarch64 + } + } +} + +/// Hypervisor backend the snapshot was captured under. Checked on +/// load because vCPU register state is backend-specific. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub(super) enum Hypervisor { + Kvm, + Mshv, + Whp, +} + +impl Hypervisor { + pub(super) fn current() -> Option { + #[allow(unused_imports)] + use crate::hypervisor::virtual_machine::HypervisorType; + use crate::hypervisor::virtual_machine::get_available_hypervisor; + + match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Some(Self::Kvm), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Some(Self::Mshv), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Some(Self::Whp), + None => None, + } + } + + fn name(&self) -> &'static str { + match self { + Self::Kvm => "KVM", + Self::Mshv => "MSHV", + Self::Whp => "WHP", + } + } +} + +// --- Config JSON shape ---------------------------------------------- + +/// Top-level Hyperlight snapshot config JSON. Lives at +/// `blobs/sha256/` with media type +/// `application/vnd.hyperlight.snapshot.config.v1+json`. +/// +/// In OCI terms this is the "image config" blob that the manifest's +/// `config` descriptor points to. It describes the accompanying +/// memory layer (the snapshot bytes) and everything the loader needs +/// to reconstruct a runnable `Snapshot`. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct OciSnapshotConfig { + /// Hyperlight crate version that produced this config. Recorded + /// for diagnostics. Not checked on load. + pub(super) hyperlight_version: String, + pub(super) arch: Arch, + /// Memory blob ABI version. See [`SNAPSHOT_ABI_VERSION`]. + pub(super) abi_version: u32, + pub(super) hypervisor: Hypervisor, + /// Top of the guest stack, in guest virtual address space. + pub(super) stack_top_gva: u64, + pub(super) entrypoint: Entrypoint, + pub(super) layout: MemoryLayout, + /// Total size of the memory blob in bytes (including the guest + /// page-table tail, if any). Equal to `self.memory.mem_size()`. + pub(super) memory_size: u64, + /// Names and signatures of host functions registered when this + /// snapshot was taken. Validated against the loader's registry. + pub(super) host_functions: Vec, + /// Generation counter for the snapshot. Restored verbatim into + /// the `Snapshot` so guest-visible bookkeeping at + /// `SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET` is continuous across + /// save/load. + pub(super) snapshot_generation: u64, +} + +/// What the loader should do with the restored sandbox: jump to the +/// guest entrypoint, or resume a paused call with captured sregs. +/// The enum shape enforces that `Call` carries sregs and `Initialise` +/// does not. +#[derive(Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "lowercase", deny_unknown_fields)] +pub(super) enum Entrypoint { + Initialise { addr: u64 }, + Call { addr: u64, sregs: Box }, +} + +/// Sizes and permissions of the regions inside the snapshot blob, +/// enough for the loader to rebuild a `SandboxMemoryLayout`. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct MemoryLayout { + pub(super) input_data_size: usize, + pub(super) output_data_size: usize, + pub(super) heap_size: usize, + pub(super) code_size: usize, + pub(super) init_data_size: usize, + /// Memory region flag bits. `None` means default permissions. + pub(super) init_data_permissions: Option, + pub(super) scratch_size: usize, + pub(super) snapshot_size: usize, + pub(super) pt_size: Option, +} + +/// Name and signature of one host function registered when the +/// snapshot was taken. The loader validates these against the +/// registry of the sandbox it is restoring into. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct HostFunction { + function_name: String, + parameter_types: Vec, + return_type: ReturnTypeRepr, +} + +/// JSON-friendly mirror of +/// [`hyperlight_common::flatbuffer_wrappers::function_types::ParameterType`]. +/// Kept local so we don't have to plumb serde through `hyperlight_common`. +/// The `match`es below are exhaustive: any new variant upstream forces +/// an explicit decision here. +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "snake_case")] +enum ParameterTypeRepr { + Int, + UInt, + Long, + ULong, + Float, + Double, + String, + Bool, + VecBytes, +} + +/// JSON-friendly mirror of +/// [`hyperlight_common::flatbuffer_wrappers::function_types::ReturnType`]. +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "snake_case")] +enum ReturnTypeRepr { + Int, + UInt, + Long, + ULong, + Float, + Double, + String, + Bool, + Void, + VecBytes, +} + +impl From<&ParameterType> for ParameterTypeRepr { + fn from(p: &ParameterType) -> Self { + match p { + ParameterType::Int => Self::Int, + ParameterType::UInt => Self::UInt, + ParameterType::Long => Self::Long, + ParameterType::ULong => Self::ULong, + ParameterType::Float => Self::Float, + ParameterType::Double => Self::Double, + ParameterType::String => Self::String, + ParameterType::Bool => Self::Bool, + ParameterType::VecBytes => Self::VecBytes, + } + } +} + +impl From for ParameterType { + fn from(r: ParameterTypeRepr) -> Self { + match r { + ParameterTypeRepr::Int => Self::Int, + ParameterTypeRepr::UInt => Self::UInt, + ParameterTypeRepr::Long => Self::Long, + ParameterTypeRepr::ULong => Self::ULong, + ParameterTypeRepr::Float => Self::Float, + ParameterTypeRepr::Double => Self::Double, + ParameterTypeRepr::String => Self::String, + ParameterTypeRepr::Bool => Self::Bool, + ParameterTypeRepr::VecBytes => Self::VecBytes, + } + } +} + +impl From<&ReturnType> for ReturnTypeRepr { + fn from(r: &ReturnType) -> Self { + match r { + ReturnType::Int => Self::Int, + ReturnType::UInt => Self::UInt, + ReturnType::Long => Self::Long, + ReturnType::ULong => Self::ULong, + ReturnType::Float => Self::Float, + ReturnType::Double => Self::Double, + ReturnType::String => Self::String, + ReturnType::Bool => Self::Bool, + ReturnType::Void => Self::Void, + ReturnType::VecBytes => Self::VecBytes, + } + } +} + +impl From for ReturnType { + fn from(r: ReturnTypeRepr) -> Self { + match r { + ReturnTypeRepr::Int => Self::Int, + ReturnTypeRepr::UInt => Self::UInt, + ReturnTypeRepr::Long => Self::Long, + ReturnTypeRepr::ULong => Self::ULong, + ReturnTypeRepr::Float => Self::Float, + ReturnTypeRepr::Double => Self::Double, + ReturnTypeRepr::String => Self::String, + ReturnTypeRepr::Bool => Self::Bool, + ReturnTypeRepr::Void => Self::Void, + ReturnTypeRepr::VecBytes => Self::VecBytes, + } + } +} + +/// Captured x86_64 special registers for a paused vCPU. Round-trips +/// to/from [`CommonSpecialRegisters`] and is restored verbatim when +/// resuming a `Call` entrypoint. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct Sregs { + cs: SegmentRegister, + ds: SegmentRegister, + es: SegmentRegister, + fs: SegmentRegister, + gs: SegmentRegister, + ss: SegmentRegister, + tr: SegmentRegister, + ldt: SegmentRegister, + gdt: TableRegister, + idt: TableRegister, + cr0: u64, + cr2: u64, + cr3: u64, + cr4: u64, + cr8: u64, + efer: u64, + apic_base: u64, + interrupt_bitmap: [u64; 4], +} + +/// Serde mirror of [`CommonSegmentRegister`]. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct SegmentRegister { + base: u64, + limit: u32, + selector: u16, + type_: u8, + present: u8, + dpl: u8, + db: u8, + s: u8, + l: u8, + g: u8, + avl: u8, + unusable: u8, + padding: u8, +} + +/// Serde mirror of [`CommonTableRegister`]. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct TableRegister { + base: u64, + limit: u16, +} + +// --- Conversions between repr and runtime types --------------------- + +impl From<&CommonSpecialRegisters> for Sregs { + fn from(s: &CommonSpecialRegisters) -> Self { + let seg = |r: &CommonSegmentRegister| SegmentRegister { + base: r.base, + limit: r.limit, + selector: r.selector, + type_: r.type_, + present: r.present, + dpl: r.dpl, + db: r.db, + s: r.s, + l: r.l, + g: r.g, + avl: r.avl, + unusable: r.unusable, + padding: r.padding, + }; + let tab = |r: &CommonTableRegister| TableRegister { + base: r.base, + limit: r.limit, + }; + Self { + cs: seg(&s.cs), + ds: seg(&s.ds), + es: seg(&s.es), + fs: seg(&s.fs), + gs: seg(&s.gs), + ss: seg(&s.ss), + tr: seg(&s.tr), + ldt: seg(&s.ldt), + gdt: tab(&s.gdt), + idt: tab(&s.idt), + cr0: s.cr0, + cr2: s.cr2, + cr3: s.cr3, + cr4: s.cr4, + cr8: s.cr8, + efer: s.efer, + apic_base: s.apic_base, + interrupt_bitmap: s.interrupt_bitmap, + } + } +} + +impl From for CommonSpecialRegisters { + fn from(r: Sregs) -> Self { + let seg = |s: SegmentRegister| CommonSegmentRegister { + base: s.base, + limit: s.limit, + selector: s.selector, + type_: s.type_, + present: s.present, + dpl: s.dpl, + db: s.db, + s: s.s, + l: s.l, + g: s.g, + avl: s.avl, + unusable: s.unusable, + padding: s.padding, + }; + let tab = |t: TableRegister| CommonTableRegister { + base: t.base, + limit: t.limit, + }; + Self { + cs: seg(r.cs), + ds: seg(r.ds), + es: seg(r.es), + fs: seg(r.fs), + gs: seg(r.gs), + ss: seg(r.ss), + tr: seg(r.tr), + ldt: seg(r.ldt), + gdt: tab(r.gdt), + idt: tab(r.idt), + cr0: r.cr0, + cr2: r.cr2, + cr3: r.cr3, + cr4: r.cr4, + cr8: r.cr8, + efer: r.efer, + apic_base: r.apic_base, + interrupt_bitmap: r.interrupt_bitmap, + } + } +} + +impl From<&HostFunctionDefinition> for HostFunction { + fn from(d: &HostFunctionDefinition) -> Self { + let parameter_types = d + .parameter_types + .as_ref() + .map(|v| v.iter().map(ParameterTypeRepr::from).collect()) + .unwrap_or_default(); + Self { + function_name: d.function_name.clone(), + parameter_types, + return_type: ReturnTypeRepr::from(&d.return_type), + } + } +} + +impl From for HostFunctionDefinition { + fn from(r: HostFunction) -> Self { + Self { + function_name: r.function_name, + parameter_types: Some(r.parameter_types.into_iter().map(Into::into).collect()), + return_type: r.return_type.into(), + } + } +} + +impl OciSnapshotConfig { + pub(super) fn validate_for_load(&self) -> crate::Result<()> { + if self.arch != Arch::current() { + return Err(crate::new_error!( + "snapshot architecture mismatch: file is {:?}, current host is {:?}", + self.arch, + Arch::current() + )); + } + if self.abi_version != SNAPSHOT_ABI_VERSION { + return Err(crate::new_error!( + "snapshot ABI version mismatch: file has version {}, this build expects {}. \ + The snapshot must be regenerated from the guest binary.", + self.abi_version, + SNAPSHOT_ABI_VERSION + )); + } + let current_hv = Hypervisor::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to load snapshot"))?; + if self.hypervisor != current_hv { + return Err(crate::new_error!( + "snapshot hypervisor mismatch: file was created on {} but the current hypervisor is {}", + self.hypervisor.name(), + current_hv.name() + )); + } + // Bound memory size early so the subsequent file-size check + // does not have to deal with absurd values. + if self.memory_size == 0 || self.memory_size > SandboxMemoryLayout::MAX_MEMORY_SIZE as u64 { + return Err(crate::new_error!( + "snapshot memory_size ({}) is out of range", + self.memory_size + )); + } + if self.memory_size as usize % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot memory_size ({}) is not a multiple of PAGE_SIZE", + self.memory_size + )); + } + // Invariant: `snapshot_size + pt_size == memory_size`. + // `snapshot_size` is the guest-visible prefix of the blob, + // mapped into guest PA space at `BASE_ADDRESS`. `pt_size` + // is the page-table tail that sits after it in the blob and + // the host mapping, outside the guest mapping of the + // snapshot region. + if self.layout.snapshot_size == 0 { + return Err(crate::new_error!("snapshot snapshot_size must be nonzero")); + } + if self.layout.snapshot_size % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot snapshot_size ({}) is not a multiple of PAGE_SIZE", + self.layout.snapshot_size + )); + } + let pt = self.layout.pt_size.unwrap_or(0); + if pt % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot pt_size ({}) is not a multiple of PAGE_SIZE", + pt + )); + } + if (self.layout.snapshot_size as u64).saturating_add(pt as u64) != self.memory_size { + return Err(crate::new_error!( + "snapshot snapshot_size ({}) + pt_size ({}) does not equal memory_size ({})", + self.layout.snapshot_size, + pt, + self.memory_size + )); + } + if let Some(bits) = self.layout.init_data_permissions { + MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!( + "snapshot init_data_permissions {:#x} contains unknown flag bits", + bits + ) + })?; + } + + // Entrypoint address must point inside the guest snapshot + // region. Hyperlight identity-maps the snapshot region in low + // GPAs, so the same bounds apply to virtual and physical + // addresses there. A crafted config could otherwise direct + // execution into unmapped GPA space (which only catches the + // bug at vCPU run time) or, worse, into the scratch region + // (which is writable). The bound here is + // `[BASE_ADDRESS, BASE_ADDRESS + snapshot_size)` because the + // snapshot blob covers exactly the snapshot region. + let snap_lo = SandboxMemoryLayout::BASE_ADDRESS as u64; + let snap_hi = snap_lo + .checked_add(self.layout.snapshot_size as u64) + .ok_or_else(|| { + crate::new_error!( + "snapshot layout overflow: BASE_ADDRESS + snapshot_size ({}) does not fit in u64", + self.layout.snapshot_size + ) + })?; + let entry_addr = match &self.entrypoint { + Entrypoint::Initialise { addr } => *addr, + Entrypoint::Call { addr, .. } => *addr, + }; + if entry_addr < snap_lo || entry_addr >= snap_hi { + return Err(crate::new_error!( + "snapshot entrypoint addr {:#x} is outside the snapshot region [{:#x}, {:#x})", + entry_addr, + snap_lo, + snap_hi + )); + } + Ok(()) + } +} + +#[cfg(test)] +mod schema_pin { + use super::*; + + const PINNED_INITIALISE: &str = r#"{ + "hyperlight_version": "x.y.z", + "arch": "x86_64", + "abi_version": 1, + "hypervisor": "kvm", + "stack_top_gva": 3735928559, + "entrypoint": { + "kind": "initialise", + "addr": 4096 + }, + "layout": { + "input_data_size": 1, + "output_data_size": 2, + "heap_size": 3, + "code_size": 4, + "init_data_size": 5, + "init_data_permissions": 7, + "scratch_size": 8, + "snapshot_size": 9, + "pt_size": 10 + }, + "memory_size": 65536, + "host_functions": [ + { + "function_name": "fn_int", + "parameter_types": [ + "int", + "u_int", + "long", + "u_long", + "float", + "double", + "string", + "bool", + "vec_bytes" + ], + "return_type": "int" + }, + { + "function_name": "fn_uint", + "parameter_types": [], + "return_type": "u_int" + }, + { + "function_name": "fn_long", + "parameter_types": [], + "return_type": "long" + }, + { + "function_name": "fn_ulong", + "parameter_types": [], + "return_type": "u_long" + }, + { + "function_name": "fn_float", + "parameter_types": [], + "return_type": "float" + }, + { + "function_name": "fn_double", + "parameter_types": [], + "return_type": "double" + }, + { + "function_name": "fn_string", + "parameter_types": [], + "return_type": "string" + }, + { + "function_name": "fn_bool", + "parameter_types": [], + "return_type": "bool" + }, + { + "function_name": "fn_void", + "parameter_types": [], + "return_type": "void" + }, + { + "function_name": "fn_vecbytes", + "parameter_types": [], + "return_type": "vec_bytes" + } + ], + "snapshot_generation": 42 +}"#; + + const PINNED_CALL: &str = r#"{ + "hyperlight_version": "x.y.z", + "arch": "x86_64", + "abi_version": 1, + "hypervisor": "mshv", + "stack_top_gva": 3735928559, + "entrypoint": { + "kind": "call", + "addr": 8192, + "sregs": { + "cs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ds": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "es": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "fs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "gs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ss": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "tr": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ldt": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "gdt": { + "base": 1, + "limit": 2 + }, + "idt": { + "base": 3, + "limit": 4 + }, + "cr0": 1, + "cr2": 2, + "cr3": 3, + "cr4": 4, + "cr8": 5, + "efer": 6, + "apic_base": 7, + "interrupt_bitmap": [ + 8, + 9, + 10, + 11 + ] + } + }, + "layout": { + "input_data_size": 1, + "output_data_size": 2, + "heap_size": 3, + "code_size": 4, + "init_data_size": 5, + "init_data_permissions": null, + "scratch_size": 8, + "snapshot_size": 9, + "pt_size": null + }, + "memory_size": 65536, + "host_functions": [ + { + "function_name": "fn_void", + "parameter_types": [ + "bool" + ], + "return_type": "void" + } + ], + "snapshot_generation": 42 +}"#; + + const PINNED_ARCH: &str = r#"[ + "x86_64", + "aarch64", + "i686" + ]"#; + + const PINNED_HYPERVISOR: &str = r#"[ + "kvm", + "mshv", + "whp" + ]"#; + + fn assert_round_trip(pinned: &str) { + let parsed: OciSnapshotConfig = + serde_json::from_str(pinned).expect("pinned JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!( + actual.trim(), + pinned.trim(), + "Snapshot config JSON schema changed. If the change can break \ + existing snapshots on disk, bump `MT_CONFIG_V1` in \ + `super::media_types` and follow `docs/snapshot-versioning.md`. \ + Either way, paste the actual output below into the matching \ + `PINNED_*`.\n\nactual:\n{actual}" + ); + } + + #[test] + fn initialise_round_trip() { + assert_round_trip(PINNED_INITIALISE); + } + + #[test] + fn call_round_trip() { + assert_round_trip(PINNED_CALL); + } + + #[test] + fn arch_variants_round_trip() { + let parsed: Vec = + serde_json::from_str(PINNED_ARCH).expect("pinned arch JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!(actual.trim(), PINNED_ARCH.trim(), "Arch variants changed."); + } + + #[test] + fn hypervisor_variants_round_trip() { + let parsed: Vec = serde_json::from_str(PINNED_HYPERVISOR) + .expect("pinned hypervisor JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!( + actual.trim(), + PINNED_HYPERVISOR.trim(), + "Hypervisor variants changed." + ); + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/digest.rs b/src/hyperlight_host/src/sandbox/snapshot/file/digest.rs new file mode 100644 index 000000000..db70495a7 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/digest.rs @@ -0,0 +1,132 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::io::{Read, Seek, SeekFrom}; + +use oci_spec::image::Digest; +use sha2::{Digest as _, Sha256}; + +/// A `sha256:` digest as recorded in OCI manifests. The bare hex +/// (without prefix) is also the blob's filename inside `blobs/sha256/`. +#[derive(Clone)] +pub(super) struct Digest256 { + /// Lowercase hex of the 32-byte sha256 output. + pub(super) hex: String, +} + +impl Digest256 { + pub(super) fn from_bytes(bytes: &[u8]) -> Self { + let arr: [u8; 32] = Sha256::digest(bytes).into(); + Self { + hex: hex::encode(arr), + } + } + + fn from_hasher(h: Sha256) -> Self { + let arr: [u8; 32] = h.finalize().into(); + Self { + hex: hex::encode(arr), + } + } +} + +/// Build an `oci_spec::image::Digest` from a [`Digest256`]. +pub(super) fn oci_digest(d: &Digest256) -> crate::Result { + Digest::try_from(format!("sha256:{}", d.hex)) + .map_err(|e| crate::new_error!("failed to construct OCI digest: {}", e)) +} + +pub(super) fn parse_oci_digest(s: &str) -> crate::Result { + let rest = s.strip_prefix("sha256:").ok_or_else(|| { + crate::new_error!( + "OCI descriptor digest {:?} is not a sha256 digest (only sha256 is supported)", + s + ) + })?; + // OCI image-spec pins sha256 encoding to `[a-f0-9]{64}`. Reject + // uppercase hex up front so we stay byte-compatible with + // containerd, oras, crane, and the Docker registry. + if rest.len() != 64 + || !rest + .bytes() + .all(|b| b.is_ascii_digit() || (b'a'..=b'f').contains(&b)) + { + return Err(crate::new_error!( + "OCI descriptor digest {:?} is not a 64-character lowercase hex string", + s + )); + } + Ok(rest.to_string()) +} + +/// Compute sha256 of `bytes` and verify it equals `expected_hex`. +/// Used to validate manifest and config blobs (small, already in +/// memory). +pub(super) fn verify_blob_bytes( + label: &str, + bytes: &[u8], + expected_hex: &str, +) -> crate::Result<()> { + let actual = Digest256::from_bytes(bytes); + if actual.hex != expected_hex { + return Err(crate::new_error!( + "{} blob digest mismatch: descriptor declares sha256:{}, file hashes to sha256:{}", + label, + expected_hex, + actual.hex + )); + } + Ok(()) +} + +/// Stream-hash an already-open file and verify its sha256 equals +/// `expected_hex`. +/// +/// Takes the same `File` handle the caller will subsequently `mmap`, +/// not a path. Hashing one open and mapping another is open-then- +/// replace TOCTOU bait. Seeks to start before and after so the +/// caller's file position is unchanged. +pub(super) fn verify_blob_file( + label: &str, + file: &mut std::fs::File, + expected_hex: &str, +) -> crate::Result<()> { + file.seek(SeekFrom::Start(0)) + .map_err(|e| crate::new_error!("failed to seek {} blob: {}", label, e))?; + let mut hasher = Sha256::new(); + let mut buf = [0u8; 64 * 1024]; + loop { + let n = file + .read(&mut buf) + .map_err(|e| crate::new_error!("failed to read {} blob: {}", label, e))?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + file.seek(SeekFrom::Start(0)) + .map_err(|e| crate::new_error!("failed to rewind {} blob: {}", label, e))?; + let actual = Digest256::from_hasher(hasher); + if actual.hex != expected_hex { + return Err(crate::new_error!( + "{} blob digest mismatch: descriptor declares sha256:{}, file hashes to sha256:{}", + label, + expected_hex, + actual.hex + )); + } + Ok(()) +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs b/src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs new file mode 100644 index 000000000..bd0bae63f --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs @@ -0,0 +1,101 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::io::Read; +use std::path::Path; + +use super::digest::Digest256; + +/// Write `bytes` to `target` atomically: write to a sibling tmp +/// file in the same directory, fsync nothing extra, then `rename`. +/// On rename failure the tmp file is removed. +/// +/// The tmp name embeds pid and a monotonic-ish nanos suffix to keep +/// concurrent writers in the same directory from colliding on the +/// tmp path. Concurrent writers to the same `target` still race on +/// the final rename, which is the caller's contract to avoid. +pub(super) fn write_file_atomic(target: &Path, bytes: &[u8]) -> crate::Result<()> { + let parent = target.parent().unwrap_or(Path::new(".")); + let file_name = target.file_name().and_then(|s| s.to_str()).ok_or_else(|| { + crate::new_error!("atomic write: target {:?} has no UTF-8 file name", target) + })?; + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let tmp = parent.join(format!( + ".{}.tmp-{}-{}", + file_name, + std::process::id(), + nanos + )); + std::fs::write(&tmp, bytes) + .map_err(|e| crate::new_error!("atomic write: failed to write tmp {:?}: {}", tmp, e))?; + std::fs::rename(&tmp, target).map_err(|e| { + let _ = std::fs::remove_file(&tmp); + crate::new_error!( + "atomic write: failed to rename {:?} -> {:?}: {}", + tmp, + target, + e + ) + }) +} + +/// Write a content-addressed blob into `blobs_dir`, skipping the +/// write if a file at `blobs_dir/` is already present and has +/// the expected length. Skipping is safe because the filename is the +/// sha256 of the bytes: a name match implies a content match outside +/// of a hash collision. The size check defends against half-written +/// stragglers left over from a previous crash. +pub(super) fn write_blob_if_absent( + blobs_dir: &Path, + digest: &Digest256, + bytes: &[u8], +) -> crate::Result<()> { + let target = blobs_dir.join(&digest.hex); + if let Ok(meta) = std::fs::metadata(&target) + && meta.is_file() + && meta.len() == bytes.len() as u64 + { + return Ok(()); + } + write_file_atomic(&target, bytes) +} + +/// Read a file in full, refusing if the file is bigger than `max_size`. +/// +/// The cap is enforced on the actual byte stream via [`Read::take`], so files +/// whose `metadata().len()` is misleading cannot exceed the limit. +pub(super) fn read_bounded(path: &Path, max_size: u64) -> crate::Result> { + let f = std::fs::File::open(path) + .map_err(|e| crate::new_error!("failed to open {:?}: {}", path, e))?; + let hint = f.metadata().map(|m| m.len().min(max_size)).unwrap_or(0); + let mut buf = Vec::with_capacity(hint as usize); + // Read one extra byte so we can distinguish "exactly at the limit" from + // "over the limit" instead of silently truncating an oversize file. + f.take(max_size.saturating_add(1)) + .read_to_end(&mut buf) + .map_err(|e| crate::new_error!("failed to read {:?}: {}", path, e))?; + if buf.len() as u64 > max_size { + return Err(crate::new_error!( + "file {:?} exceeds maximum allowed {} bytes", + path, + max_size + )); + } + Ok(buf) +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs new file mode 100644 index 000000000..3afbe6e35 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs @@ -0,0 +1,36 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Media types are versioned by suffix. The loader matches each +// version specifically (no `_CURRENT` shortcut on the read side); the +// writer always emits `_CURRENT`. A new version is added by: +// +// 1. Declare `MT_FOO_V2` next to `MT_FOO_V1`. +// 2. Point `MT_FOO_CURRENT` at `MT_FOO_V2`. +// 3. Add a dispatch arm in the loader that converts v1 -> v2 (or +// rejects v1 if no compatibility window is offered). +pub(in crate::sandbox::snapshot) const MT_CONFIG_V1: &str = + "application/vnd.hyperlight.snapshot.config.v1+json"; +pub(in crate::sandbox::snapshot) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; +pub(in crate::sandbox::snapshot) const MT_SNAPSHOT_V1: &str = + "application/vnd.hyperlight.snapshot.memory.v1"; +pub(in crate::sandbox::snapshot) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; + +/// ABI version for the snapshot memory blob. Bumped whenever the +/// host-guest contract for the bytes inside the snapshot blob changes +/// (PEB layout, calling convention, init state, etc.). Independent of +/// the config blob's media-type version. +pub(in crate::sandbox::snapshot) const SNAPSHOT_ABI_VERSION: u32 = 1; diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs new file mode 100644 index 000000000..737495665 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs @@ -0,0 +1,693 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! OCI Image Layout serde for [`Snapshot`]. See +//! `docs/snapshot-oci-format.md` for the on-disk format. + +mod config; +mod digest; +mod fsutil; +mod media_types; + +use std::path::Path; + +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; +use hyperlight_common::vmem::PAGE_SIZE; +use oci_spec::image::{ + Descriptor, DescriptorBuilder, ImageIndex, ImageIndexBuilder, ImageManifest, + ImageManifestBuilder, MediaType, SCHEMA_VERSION, +}; + +use self::config::{ + Arch, Entrypoint, HostFunction, Hypervisor, MemoryLayout, OciSnapshotConfig, Sregs, +}; +use self::digest::{Digest256, oci_digest, parse_oci_digest, verify_blob_bytes, verify_blob_file}; +use self::fsutil::{read_bounded, write_blob_if_absent, write_file_atomic}; +pub(super) use self::media_types::{ + MT_CONFIG_CURRENT, MT_CONFIG_V1, MT_SNAPSHOT_CURRENT, MT_SNAPSHOT_V1, SNAPSHOT_ABI_VERSION, +}; +use super::{NextAction, Snapshot}; +use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::memory_region::MemoryRegionFlags; +use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; + +pub(super) const OCI_LAYOUT_VERSION: &str = "1.0.0"; + +/// Maximum size of the config JSON blob. Bounds the allocation done +/// before we parse the JSON. +const MAX_CONFIG_BLOB_SIZE: u64 = 1024 * 1024; + +/// OCI standard annotation key for a manifest's tag inside an image +/// index. Set on the manifest descriptor in `index.json`, not on the +/// manifest blob itself. See the OCI Image Spec, "Annotations" and +/// the Image Layout spec. +const ANNOTATION_REF_NAME: &str = "org.opencontainers.image.ref.name"; + +/// Validate a tag against the OCI Distribution spec rules: +/// `[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`. Required so that the same +/// strings work both in our local layout and when pushed to a +/// registry via `oras` / `crane` / `skopeo`. +fn validate_tag(tag: &str) -> crate::Result<()> { + let bytes = tag.as_bytes(); + if bytes.is_empty() || bytes.len() > 128 { + return Err(crate::new_error!( + "tag {:?} is invalid: must be 1..=128 bytes", + tag + )); + } + let first = bytes[0]; + if !(first.is_ascii_alphanumeric() || first == b'_') { + return Err(crate::new_error!( + "tag {:?} is invalid: first character must be alphanumeric or '_'", + tag + )); + } + for &b in &bytes[1..] { + if !(b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'-') { + return Err(crate::new_error!( + "tag {:?} is invalid: characters after the first must be \ + alphanumeric or one of '_', '.', '-'", + tag + )); + } + } + Ok(()) +} + +impl Snapshot { + /// Save this snapshot into the OCI Image Layout directory at + /// `path` under `tag`. + /// + /// `tag` is written to `index.json` as + /// `org.opencontainers.image.ref.name` and must satisfy the OCI + /// tag grammar (`[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`). + /// + /// The parent directory of `path` must already exist. `path` + /// itself is created if absent. If a layout already exists at + /// `path`, this call appends to it: other tags in `index.json` + /// are kept untouched, and a manifest descriptor whose + /// `org.opencontainers.image.ref.name` annotation equals `tag` + /// is replaced. Blobs are content-addressed and shared across + /// tags. See `docs/snapshot-oci-format.md` for the full on-disk + /// format and atomicity guarantees. + /// + /// A pre-existing `oci-layout` file must declare a supported + /// `imageLayoutVersion`. Otherwise the call errors without + /// touching the directory. + /// + /// # Portability + /// + /// Snapshot images are bound to a specific CPU architecture and + /// hypervisor. Both are recorded in the config blob and checked + /// at load time, with mismatches rejected with a clear error. + /// The hypervisor tag (kvm/mshv/whp) constrains the host OS. + pub fn to_oci(&self, path: impl AsRef, tag: &str) -> crate::Result<()> { + let path = path.as_ref(); + validate_tag(tag)?; + + // The parent directory must already exist. `path` itself is + // created if absent. An existing regular file at `path` is + // rejected by the underlying `create_dir`. + match path.parent() { + Some(p) if !p.as_os_str().is_empty() => { + let parent_meta = std::fs::metadata(p).map_err(|e| { + crate::new_error!("to_oci: parent directory {:?} not accessible: {}", p, e) + })?; + if !parent_meta.is_dir() { + return Err(crate::new_error!( + "to_oci: parent of {:?} is not a directory", + path + )); + } + } + _ => {} + } + match std::fs::create_dir(path) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + let meta = std::fs::metadata(path) + .map_err(|e| crate::new_error!("to_oci: failed to stat {:?}: {}", path, e))?; + if !meta.is_dir() { + return Err(crate::new_error!( + "to_oci: {:?} exists and is not a directory", + path + )); + } + } + Err(e) => { + return Err(crate::new_error!( + "to_oci: failed to create layout dir {:?}: {}", + path, + e + )); + } + } + + // Validate any pre-existing `oci-layout` marker before + // touching anything else, so a foreign layout (future + // version, hand-edited file) is reported without altering + // the directory. + let layout_marker = path.join("oci-layout"); + let marker_existed = layout_marker + .try_exists() + .map_err(|e| crate::new_error!("to_oci: failed to stat {:?}: {}", layout_marker, e))?; + if marker_existed { + let bytes = read_bounded(&layout_marker, MAX_CONFIG_BLOB_SIZE).map_err(|e| { + crate::new_error!("to_oci: failed to read existing oci-layout: {}", e) + })?; + let v: serde_json::Value = serde_json::from_slice(&bytes).map_err(|e| { + crate::new_error!("to_oci: existing oci-layout is not valid JSON: {}", e) + })?; + match v.get("imageLayoutVersion").and_then(|s| s.as_str()) { + Some(s) if s == OCI_LAYOUT_VERSION => {} + Some(other) => { + return Err(crate::new_error!( + "to_oci: existing imageLayoutVersion {:?} is unsupported (expected {:?})", + other, + OCI_LAYOUT_VERSION + )); + } + None => { + return Err(crate::new_error!( + "to_oci: existing oci-layout is missing imageLayoutVersion" + )); + } + } + } + + let index_path = path.join("index.json"); + let index_existed = index_path + .try_exists() + .map_err(|e| crate::new_error!("to_oci: failed to stat {:?}: {}", index_path, e))?; + let mut manifests: Vec = if index_existed { + let bytes = read_bounded(&index_path, MAX_CONFIG_BLOB_SIZE).map_err(|e| { + crate::new_error!("to_oci: failed to read existing index.json: {}", e) + })?; + let existing: ImageIndex = serde_json::from_slice(&bytes).map_err(|e| { + crate::new_error!( + "to_oci: existing index.json is not a valid OCI image index: {}", + e + ) + })?; + existing.manifests().to_vec() + } else { + Vec::new() + }; + + let new_desc = self.write_blobs_and_build_descriptor(path, tag)?; + + // Replacement is by tag, not by digest: a new snapshot may + // hash to a different value but still claim the same logical + // ref. Blobs from the replaced manifest become orphans. + manifests.retain(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str() != tag) + .unwrap_or(true) + }); + manifests.push(new_desc); + + let index = ImageIndexBuilder::default() + .schema_version(SCHEMA_VERSION) + .media_type(MediaType::ImageIndex) + .manifests(manifests) + .build() + .map_err(|e| crate::new_error!("failed to build OCI index: {}", e))?; + let index_bytes = serde_json::to_vec_pretty(&index) + .map_err(|e| crate::new_error!("failed to serialise OCI index: {}", e))?; + + // Write the marker before the index swap. A loader that sees + // the new index requires the marker; ordering them this way + // keeps the layout valid at every step. + if !marker_existed { + let layout_bytes = serde_json::to_vec(&serde_json::json!({ + "imageLayoutVersion": OCI_LAYOUT_VERSION, + })) + .map_err(|e| crate::new_error!("failed to serialise oci-layout: {}", e))?; + write_file_atomic(&layout_marker, &layout_bytes)?; + } + + // Index swap is the commit point. + write_file_atomic(&index_path, &index_bytes)?; + + Ok(()) + } + + fn write_blobs_and_build_descriptor(&self, dir: &Path, tag: &str) -> crate::Result { + let blobs_dir = dir.join("blobs").join("sha256"); + std::fs::create_dir_all(&blobs_dir).map_err(|e| { + crate::new_error!("failed to create OCI blobs dir {:?}: {}", blobs_dir, e) + })?; + + // Snapshot blob: the raw memory bytes. + let memory_bytes = self.memory.as_slice(); + let memory_size = memory_bytes.len(); + if memory_size == 0 || memory_size % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot memory size {} must be a non-zero multiple of PAGE_SIZE", + memory_size + )); + } + let snapshot_digest = Digest256::from_bytes(memory_bytes); + write_blob_if_absent(&blobs_dir, &snapshot_digest, memory_bytes)?; + + // Config blob. + let cfg = self.build_config()?; + let cfg_bytes = serde_json::to_vec_pretty(&cfg) + .map_err(|e| crate::new_error!("failed to serialise config JSON: {}", e))?; + let cfg_digest = Digest256::from_bytes(&cfg_bytes); + write_blob_if_absent(&blobs_dir, &cfg_digest, &cfg_bytes)?; + + // Manifest blob. + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other(MT_CONFIG_CURRENT.to_string())) + .digest(oci_digest(&cfg_digest)?) + .size(cfg_bytes.len() as u64) + .build() + .map_err(|e| crate::new_error!("failed to build config descriptor: {}", e))?; + let snapshot_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other(MT_SNAPSHOT_CURRENT.to_string())) + .digest(oci_digest(&snapshot_digest)?) + .size(memory_size as u64) + .build() + .map_err(|e| crate::new_error!("failed to build snapshot descriptor: {}", e))?; + // `artifactType` is set equal to `config.mediaType` per OCI + // image-spec "Guidelines for Artifact Usage". Registries + // surface this on the distribution-spec referrers API. Tools + // that read only `config.mediaType` see the same value. + let manifest = ImageManifestBuilder::default() + .schema_version(SCHEMA_VERSION) + .media_type(MediaType::ImageManifest) + .artifact_type(MediaType::Other(MT_CONFIG_CURRENT.to_string())) + .config(config_descriptor) + .layers(vec![snapshot_descriptor]) + .build() + .map_err(|e| crate::new_error!("failed to build OCI manifest: {}", e))?; + let manifest_bytes = serde_json::to_vec_pretty(&manifest) + .map_err(|e| crate::new_error!("failed to serialise OCI manifest: {}", e))?; + let manifest_digest = Digest256::from_bytes(&manifest_bytes); + write_blob_if_absent(&blobs_dir, &manifest_digest, &manifest_bytes)?; + + let mut anns = std::collections::HashMap::new(); + anns.insert(ANNOTATION_REF_NAME.to_string(), tag.to_string()); + DescriptorBuilder::default() + .media_type(MediaType::ImageManifest) + .digest(oci_digest(&manifest_digest)?) + .size(manifest_bytes.len() as u64) + .annotations(anns) + .build() + .map_err(|e| crate::new_error!("failed to build manifest descriptor: {}", e)) + } + + fn build_config(&self) -> crate::Result { + let entrypoint = match (self.entrypoint, self.sregs.as_ref()) { + (NextAction::Initialise(addr), None) => Entrypoint::Initialise { addr }, + (NextAction::Call(addr), Some(sregs)) => Entrypoint::Call { + addr, + sregs: Box::new(Sregs::from(sregs)), + }, + (NextAction::Initialise(_), Some(_)) => { + return Err(crate::new_error!( + "snapshot inconsistent: Initialise entrypoint must not have sregs" + )); + } + (NextAction::Call(_), None) => { + return Err(crate::new_error!( + "snapshot inconsistent: Call entrypoint must have sregs" + )); + } + #[cfg(test)] + (NextAction::None, _) => { + return Err(crate::new_error!( + "snapshot with NextAction::None cannot be persisted" + )); + } + }; + + let host_functions = match &self.host_functions.host_functions { + Some(v) => v.iter().map(HostFunction::from).collect(), + None => Vec::new(), + }; + + let l = &self.layout; + Ok(OciSnapshotConfig { + hyperlight_version: env!("CARGO_PKG_VERSION").to_string(), + arch: Arch::current(), + abi_version: SNAPSHOT_ABI_VERSION, + hypervisor: Hypervisor::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to tag snapshot"))?, + stack_top_gva: self.stack_top_gva, + entrypoint, + layout: MemoryLayout { + input_data_size: l.input_data_size, + output_data_size: l.output_data_size, + heap_size: l.heap_size, + code_size: l.code_size, + init_data_size: l.init_data_size, + init_data_permissions: l.init_data_permissions.map(|f| f.bits()), + scratch_size: l.get_scratch_size(), + snapshot_size: l.snapshot_size, + pt_size: l.pt_size, + }, + memory_size: self.memory.mem_size() as u64, + host_functions, + snapshot_generation: self.snapshot_generation, + }) + } + + /// Load the snapshot tagged `tag` from an OCI Image Layout + /// directory at `path`. + /// + /// `tag` selects one manifest from `index.json` using + /// `org.opencontainers.image.ref.name`. Missing tags and duplicate + /// tags are rejected. + /// + /// This verifies sha256 for manifest, config, and snapshot blobs. + /// Use [`Snapshot::from_oci_unchecked`] to skip digest verification + /// in trusted paths. + /// + /// Returns an error for arch, hypervisor, and ABI mismatches. + /// + /// # File-mutation hazard + /// + /// Do not modify or replace files in `path` while the returned + /// `Snapshot` (or sandboxes built from it) is still alive. + pub fn from_oci(path: impl AsRef, tag: &str) -> crate::Result { + Self::from_oci_inner(path.as_ref(), tag, true) + } + + /// Like [`Snapshot::from_oci`] but **skips sha256 verification of + /// the manifest, config, and snapshot blob bytes**, trading + /// integrity checking for performance. All other validation + /// (OCI structure, descriptor sizes, schema versions, arch / + /// hypervisor / ABI tags, layout bounds, entrypoint bounds) is + /// unchanged. + pub fn from_oci_unchecked(path: impl AsRef, tag: &str) -> crate::Result { + Self::from_oci_inner(path.as_ref(), tag, false) + } + + fn from_oci_inner(path: &Path, tag: &str, verify_blobs: bool) -> crate::Result { + validate_tag(tag)?; + let meta = std::fs::metadata(path) + .map_err(|e| crate::new_error!("from_oci failed to stat {:?}: {}", path, e))?; + if !meta.is_dir() { + return Err(crate::new_error!( + "from_oci path {:?} is not a directory", + path + )); + } + + // 1. oci-layout + let layout_bytes = read_bounded(&path.join("oci-layout"), MAX_CONFIG_BLOB_SIZE) + .map_err(|e| crate::new_error!("failed to read oci-layout: {}", e))?; + let layout_json: serde_json::Value = serde_json::from_slice(&layout_bytes) + .map_err(|e| crate::new_error!("oci-layout is not valid JSON: {}", e))?; + let v = layout_json + .get("imageLayoutVersion") + .and_then(|v| v.as_str()) + .ok_or_else(|| crate::new_error!("oci-layout missing imageLayoutVersion field"))?; + if v != OCI_LAYOUT_VERSION { + return Err(crate::new_error!( + "unsupported OCI image layout version {:?} (expected {:?})", + v, + OCI_LAYOUT_VERSION + )); + } + + // 2. index.json -> manifest descriptor for `tag`. Multiple + // manifests are fine in OCI Image Layout; we select the + // one whose `org.opencontainers.image.ref.name` annotation + // matches the requested tag. Two manifests with the same + // tag is a malformed layout. + let index_bytes = read_bounded(&path.join("index.json"), MAX_CONFIG_BLOB_SIZE) + .map_err(|e| crate::new_error!("failed to read index.json: {}", e))?; + let index: ImageIndex = serde_json::from_slice(&index_bytes) + .map_err(|e| crate::new_error!("failed to parse index.json: {}", e))?; + let mut matching = index.manifests().iter().filter(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str() == tag) + .unwrap_or(false) + }); + let manifest_desc = match (matching.next(), matching.next()) { + (None, _) => { + let known: Vec<&str> = index + .manifests() + .iter() + .filter_map(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str()) + }) + .collect(); + return Err(crate::new_error!( + "no manifest tagged {:?} in OCI layout {:?}. Available tags: {:?}", + tag, + path, + known + )); + } + (Some(_), Some(_)) => { + return Err(crate::new_error!( + "OCI layout {:?} has multiple manifests tagged {:?}; tags must be unique", + path, + tag + )); + } + (Some(d), None) => d, + }; + // The manifest descriptor must advertise an OCI image + // manifest. Refuse anything else up front so we never try to + // parse, say, an image index or an arbitrary artifact blob + // as an `ImageManifest`. + if !matches!(manifest_desc.media_type(), MediaType::ImageManifest) { + return Err(crate::new_error!( + "manifest descriptor for tag {:?} has unexpected media type {:?} (expected {:?})", + tag, + manifest_desc.media_type().to_string(), + MediaType::ImageManifest.to_string() + )); + } + let manifest_hex = parse_oci_digest(manifest_desc.digest().as_ref())?; + + // 3. manifest blob + let manifest_path = path.join("blobs").join("sha256").join(&manifest_hex); + let manifest_bytes = read_bounded(&manifest_path, MAX_CONFIG_BLOB_SIZE)?; + if manifest_bytes.len() as u64 != manifest_desc.size() { + return Err(crate::new_error!( + "OCI manifest size mismatch: descriptor says {}, file is {}", + manifest_desc.size(), + manifest_bytes.len() + )); + } + if verify_blobs { + verify_blob_bytes("manifest", &manifest_bytes, &manifest_hex)?; + } + let manifest: ImageManifest = serde_json::from_slice(&manifest_bytes) + .map_err(|e| crate::new_error!("failed to parse OCI manifest JSON: {}", e))?; + if manifest.schema_version() != SCHEMA_VERSION { + return Err(crate::new_error!( + "unsupported OCI manifest schemaVersion {} (expected {})", + manifest.schema_version(), + SCHEMA_VERSION + )); + } + let cfg_desc = manifest.config(); + // Loader dispatch on config media type. A future v2 lands + // as a new arm that converts to the in-memory current shape. + let cfg_media = cfg_desc.media_type().to_string(); + match cfg_media.as_str() { + MT_CONFIG_V1 => {} + other => { + return Err(crate::new_error!( + "unexpected config media type {:?} (supported: {:?})", + other, + MT_CONFIG_V1 + )); + } + } + // `artifactType` mirrors `config.mediaType` (manifest.md + // "Guidelines for Artifact Usage"). The OCI spec leaves this + // field OPTIONAL. A Hyperlight snapshot requires it to be + // present and equal to `config.mediaType` so loaders can + // distinguish a Hyperlight artifact from an arbitrary + // manifest that happens to share blob layout. + match manifest.artifact_type() { + Some(at) if at.to_string() == cfg_media => {} + Some(at) => { + return Err(crate::new_error!( + "OCI manifest artifactType {:?} does not match config media type {:?}", + at.to_string(), + cfg_media + )); + } + None => { + return Err(crate::new_error!( + "OCI manifest is missing required artifactType (expected {:?})", + cfg_media + )); + } + } + let layers = manifest.layers(); + if layers.len() != 1 { + return Err(crate::new_error!( + "expected exactly one OCI layer (the snapshot), found {}", + layers.len() + )); + } + let snap_desc = &layers[0]; + let snap_media = snap_desc.media_type().to_string(); + match snap_media.as_str() { + MT_SNAPSHOT_V1 => {} + other => { + return Err(crate::new_error!( + "unexpected snapshot layer media type {:?} (supported: {:?})", + other, + MT_SNAPSHOT_V1 + )); + } + } + + // 4. config blob + let cfg_hex = parse_oci_digest(cfg_desc.digest().as_ref())?; + let cfg_path = path.join("blobs").join("sha256").join(&cfg_hex); + let cfg_bytes = read_bounded(&cfg_path, MAX_CONFIG_BLOB_SIZE)?; + if cfg_bytes.len() as u64 != cfg_desc.size() { + return Err(crate::new_error!( + "config blob size mismatch: descriptor says {}, file is {}", + cfg_desc.size(), + cfg_bytes.len() + )); + } + if verify_blobs { + verify_blob_bytes("config", &cfg_bytes, &cfg_hex)?; + } + let cfg: OciSnapshotConfig = serde_json::from_slice(&cfg_bytes) + .map_err(|e| crate::new_error!("failed to parse Hyperlight config JSON: {}", e))?; + cfg.validate_for_load()?; + + // 5. snapshot blob: open once, hash and mmap the same + // handle so an attacker cannot swap the file between + // verification and mapping. + let snap_hex = parse_oci_digest(snap_desc.digest().as_ref())?; + let snap_path = path.join("blobs").join("sha256").join(&snap_hex); + let mut snap_file = std::fs::File::open(&snap_path).map_err(|e| { + crate::new_error!("failed to open snapshot blob {:?}: {}", snap_path, e) + })?; + let snap_file_len = snap_file + .metadata() + .map_err(|e| crate::new_error!("failed to stat snapshot blob: {}", e))? + .len(); + let expected_blob_len = cfg.memory_size; + if snap_file_len != expected_blob_len { + return Err(crate::new_error!( + "snapshot blob size mismatch: file is {} bytes, expected {} \ + (memory_size)", + snap_file_len, + expected_blob_len, + )); + } + if snap_file_len != snap_desc.size() { + return Err(crate::new_error!( + "snapshot blob size {} disagrees with OCI descriptor size {}", + snap_file_len, + snap_desc.size() + )); + } + if verify_blobs { + verify_blob_file("snapshot", &mut snap_file, &snap_hex)?; + } + + // 6. Reconstruct layout. + let mut sbox_cfg = crate::sandbox::SandboxConfiguration::default(); + sbox_cfg.set_input_data_size(cfg.layout.input_data_size); + sbox_cfg.set_output_data_size(cfg.layout.output_data_size); + sbox_cfg.set_heap_size(cfg.layout.heap_size as u64); + sbox_cfg.set_scratch_size(cfg.layout.scratch_size); + let init_data_perms = match cfg.layout.init_data_permissions { + None => None, + Some(bits) => Some(MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!( + "snapshot init_data_permissions {:#x} contains unknown flag bits", + bits + ) + })?), + }; + let mut layout = SandboxMemoryLayout::new( + sbox_cfg, + cfg.layout.code_size, + cfg.layout.init_data_size, + init_data_perms, + )?; + // `snapshot_size` and `pt_size` are independent fields. + if let Some(pt) = cfg.layout.pt_size { + layout.set_pt_size(pt)?; + } + layout.set_snapshot_size(cfg.layout.snapshot_size); + + // 7. mmap the snapshot blob (file-backed CoW). The blob is + // the raw memory image. `ReadonlySharedMemory::from_file` + // surrounds it with host guard pages. The guest mapping + // of the snapshot region covers only the data prefix + // (`snapshot_size`). The PT tail sits past that prefix + // in the host mapping and is copied into the scratch + // region on restore. Keeping it out of the guest mapping + // of the snapshot region avoids overlap with + // `map_file_cow` regions installed immediately after the + // snapshot in guest PA space. + let memory = ReadonlySharedMemory::from_file(&snap_file, layout.snapshot_size)?; + + // 8. Build entrypoint + sregs back from the tagged enum. + let (entrypoint, sregs) = match cfg.entrypoint { + Entrypoint::Initialise { addr } => (NextAction::Initialise(addr), None), + Entrypoint::Call { addr, sregs } => ( + NextAction::Call(addr), + Some(CommonSpecialRegisters::from(*sregs)), + ), + }; + + // 9. Reconstitute host_functions metadata. + let snapshot_generation = cfg.snapshot_generation; + let host_funcs_vec: Vec< + hyperlight_common::flatbuffer_wrappers::host_function_definition::HostFunctionDefinition, + > = cfg.host_functions.into_iter().map(Into::into).collect(); + let host_functions = if host_funcs_vec.is_empty() { + HostFunctionDetails { + host_functions: None, + } + } else { + HostFunctionDetails { + host_functions: Some(host_funcs_vec), + } + }; + + Ok(Snapshot { + layout, + memory, + regions: Vec::new(), + load_info: crate::mem::exe::LoadInfo::dummy(), + stack_top_gva: cfg.stack_top_gva, + sregs, + entrypoint, + snapshot_generation, + host_functions, + }) + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs new file mode 100644 index 000000000..63777e278 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -0,0 +1,2827 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Tests for the OCI Image Layout snapshot format (`super::file`). + +#![cfg(test)] + +use std::sync::Arc; + +use hyperlight_testing::simple_guest_as_string; +use serde_json::Value; +use sha2::{Digest as _, Sha256}; + +use crate::func::Registerable; +use crate::sandbox::snapshot::Snapshot; +use crate::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; + +fn create_test_sandbox() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + UninitializedSandbox::new(GuestBinary::FilePath(path), None) + .unwrap() + .evolve() + .unwrap() +} + +fn create_snapshot_from_binary() -> Snapshot { + let path = simple_guest_as_string().unwrap(); + Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap() +} + +/// `Result::unwrap_err` requires `T: Debug`, but `Snapshot` is not +/// `Debug`. This wrapper is the test-side equivalent. +#[track_caller] +fn unwrap_err_snapshot(r: crate::Result) -> crate::HyperlightError { + match r { + Err(e) => e, + Ok(_) => panic!("expected Snapshot::from_oci to fail"), + } +} + +/// Locate the single config blob inside `oci_dir`. Returns its full +/// path. Used by tests that mutate the on-disk JSON. +fn find_config_blob(oci_dir: &std::path::Path) -> std::path::PathBuf { + let manifest_bytes = std::fs::read(oci_dir.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = oci_dir.join("blobs").join("sha256").join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let cfg_digest = manifest["config"]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + oci_dir.join("blobs").join("sha256").join(cfg_digest) +} + +// ============================================================================= +// In-memory `from_snapshot` round-trips (no file I/O). +// ============================================================================= + +#[test] +fn from_snapshot_already_initialized_in_memory() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None).unwrap(); + let result: i32 = sbox2.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn from_snapshot_in_memory_pre_init() { + let snap = create_snapshot_from_binary(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(snap), HostFunctions::default(), None).unwrap(); + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +// ============================================================================= +// Round-trip via OCI layout on disk. +// ============================================================================= + +#[test] +fn round_trip_save_load_call() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let oci = dir.path().join("snap"); + snapshot.to_oci(&oci, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&oci, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +#[test] +fn snapshot_and_pt_size_round_trip() { + // Running-sandbox snapshot. + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original_snapshot_size = snap.layout().snapshot_size; + let original_pt_size = snap.layout().pt_size; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("running"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().snapshot_size, original_snapshot_size); + assert_eq!(loaded.layout().pt_size, original_pt_size); + + // Pre-init snapshot. + let preinit = create_snapshot_from_binary(); + let preinit_snapshot_size = preinit.layout().snapshot_size; + let preinit_pt_size = preinit.layout().pt_size; + + let path = dir.path().join("preinit"); + preinit.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().snapshot_size, preinit_snapshot_size); + assert_eq!(loaded.layout().pt_size, preinit_pt_size); +} + +#[test] +fn snapshot_generation_round_trip() { + let mut sbox = create_test_sandbox(); + sbox.call::("Echo", "a".to_string()).unwrap(); + let snap1 = sbox.snapshot().unwrap(); + sbox.call::("Echo", "b".to_string()).unwrap(); + sbox.call::("Echo", "c".to_string()).unwrap(); + let snap3 = sbox.snapshot().unwrap(); + let gen1 = snap1.snapshot_generation(); + let gen3 = snap3.snapshot_generation(); + assert_ne!(gen1, gen3); + + let dir = tempfile::tempdir().unwrap(); + let p1 = dir.path().join("s1"); + let p3 = dir.path().join("s3"); + snap1.to_oci(&p1, "latest").unwrap(); + snap3.to_oci(&p3, "latest").unwrap(); + + let loaded1 = Snapshot::from_oci(&p1, "latest").unwrap(); + let loaded3 = Snapshot::from_oci(&p3, "latest").unwrap(); + assert_eq!(loaded1.snapshot_generation(), gen1); + assert_eq!(loaded3.snapshot_generation(), gen3); +} + +#[test] +fn pre_init_snapshot_save_load() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("preinit"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +// ============================================================================= +// Restore semantics (id/generation gating). +// ============================================================================= + +#[test] +fn restore_from_loaded_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "latest").unwrap()); + let mut sbox2 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox2.call::("AddToStatic", 5i32).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 5); + + sbox2.restore(loaded).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn restore_across_independent_oci_loads_succeeds() { + // Compatibility between a sandbox and a snapshot is structural + // (memory layout plus host-function set). Two independent + // `from_oci` loads of the same image produce structurally + // identical snapshots, so a sandbox built from one accepts a + // restore from the other. + let mut sbox = create_test_sandbox(); + let snap1 = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let p1 = dir.path().join("snap1"); + snap1.to_oci(&p1, "latest").unwrap(); + let p2 = dir.path().join("snap2"); + snap1.to_oci(&p2, "latest").unwrap(); + + let loaded1 = Arc::new(Snapshot::from_oci(&p1, "latest").unwrap()); + let loaded2 = Arc::new(Snapshot::from_oci(&p2, "latest").unwrap()); + + let mut sbox = MultiUseSandbox::from_snapshot(loaded2, HostFunctions::default(), None).unwrap(); + sbox.restore(loaded1).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn many_sandboxes_share_single_arc_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "latest").unwrap()); + let mut sandboxes = Vec::new(); + for _ in 0..4 { + sandboxes.push( + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(), + ); + } + for sbox in sandboxes.iter_mut() { + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + } +} + +#[test] +fn concurrent_sandboxes_from_same_oci() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let path = std::sync::Arc::new(path); + let mut handles = Vec::new(); + for _ in 0..4 { + let p = path.clone(); + handles.push(std::thread::spawn(move || { + let loaded = Snapshot::from_oci(p.as_ref(), "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + })); + } + for h in handles { + h.join().unwrap(); + } +} + +#[test] +fn cow_does_not_mutate_backing_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Hash every blob file to verify nothing changes after a CoW write + // through the loaded sandbox. + let blobs_dir = path.join("blobs").join("sha256"); + let snapshot_before: std::collections::BTreeMap<_, _> = std::fs::read_dir(&blobs_dir) + .unwrap() + .map(|e| { + let e = e.unwrap(); + let bytes = std::fs::read(e.path()).unwrap(); + (e.file_name(), bytes) + }) + .collect(); + + { + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + sbox.call::("AddToStatic", 99).unwrap(); + } + + let snapshot_after: std::collections::BTreeMap<_, _> = std::fs::read_dir(&blobs_dir) + .unwrap() + .map(|e| { + let e = e.unwrap(); + let bytes = std::fs::read(e.path()).unwrap(); + (e.file_name(), bytes) + }) + .collect(); + assert_eq!( + snapshot_before, snapshot_after, + "CoW writes must not mutate any blob in the OCI layout" + ); +} + +// ============================================================================= +// Architecture / hypervisor / ABI gating. +// ============================================================================= + +/// Compute sha256 of `bytes` and return the lowercase hex digest. +fn sha256_hex(bytes: &[u8]) -> String { + let arr: [u8; 32] = Sha256::digest(bytes).into(); + hex::encode(arr) +} + +fn rewrite_config(oci_dir: &std::path::Path, mutate: F) { + // Mutate the config blob and rewrite the manifest + index so the + // OCI layout stays self-consistent: blob filenames, descriptor + // sizes, and descriptor sha256 digests all match the current + // bytes on disk. The point of these helpers is to exercise + // field-level validators (arch, abi_version, hypervisor, etc.), + // not the digest layer; tests that want to probe the digest + // layer write raw bytes directly. + let cfg_path = find_config_blob(oci_dir); + let mut cfg: Value = serde_json::from_slice(&std::fs::read(&cfg_path).unwrap()).unwrap(); + mutate(&mut cfg); + let new_cfg_bytes = serde_json::to_vec_pretty(&cfg).unwrap(); + let new_cfg_hex = sha256_hex(&new_cfg_bytes); + let blobs_dir = oci_dir.join("blobs").join("sha256"); + let new_cfg_path = blobs_dir.join(&new_cfg_hex); + std::fs::write(&new_cfg_path, &new_cfg_bytes).unwrap(); + if new_cfg_path != cfg_path { + std::fs::remove_file(&cfg_path).ok(); + } + + let mp = manifest_path(oci_dir); + let mut manifest: Value = serde_json::from_slice(&std::fs::read(&mp).unwrap()).unwrap(); + manifest["config"]["digest"] = Value::from(format!("sha256:{}", new_cfg_hex)); + manifest["config"]["size"] = Value::from(new_cfg_bytes.len() as u64); + let new_manifest_bytes = serde_json::to_vec_pretty(&manifest).unwrap(); + let new_manifest_hex = sha256_hex(&new_manifest_bytes); + let new_manifest_path = blobs_dir.join(&new_manifest_hex); + std::fs::write(&new_manifest_path, &new_manifest_bytes).unwrap(); + if new_manifest_path != mp { + std::fs::remove_file(&mp).ok(); + } + + let index_path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&index_path).unwrap()).unwrap(); + index["manifests"][0]["digest"] = Value::from(format!("sha256:{}", new_manifest_hex)); + index["manifests"][0]["size"] = Value::from(new_manifest_bytes.len() as u64); + std::fs::write(index_path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +/// Locate the manifest blob path inside `oci_dir`. +fn manifest_path(oci_dir: &std::path::Path) -> std::path::PathBuf { + let index: Value = + serde_json::from_slice(&std::fs::read(oci_dir.join("index.json")).unwrap()).unwrap(); + let digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + oci_dir.join("blobs").join("sha256").join(digest) +} + +/// Mutate the on-disk manifest JSON. Updates the index's manifest +/// descriptor `size` and `digest` to match the new manifest bytes +/// so the test exercises the field-level validator we care about, +/// not the digest layer. +fn rewrite_manifest(oci_dir: &std::path::Path, mutate: F) { + let mp = manifest_path(oci_dir); + let mut manifest: Value = serde_json::from_slice(&std::fs::read(&mp).unwrap()).unwrap(); + mutate(&mut manifest); + let new_bytes = serde_json::to_vec_pretty(&manifest).unwrap(); + let new_hex = sha256_hex(&new_bytes); + let blobs_dir = oci_dir.join("blobs").join("sha256"); + let new_path = blobs_dir.join(&new_hex); + std::fs::write(&new_path, &new_bytes).unwrap(); + if new_path != mp { + std::fs::remove_file(&mp).ok(); + } + + let index_path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&index_path).unwrap()).unwrap(); + index["manifests"][0]["digest"] = Value::from(format!("sha256:{}", new_hex)); + index["manifests"][0]["size"] = Value::from(new_bytes.len() as u64); + std::fs::write(index_path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +/// Mutate the on-disk index JSON in place. The index is the root of +/// the OCI layout and is not itself referenced by any digest, so +/// nothing further needs to be updated. +fn rewrite_index(oci_dir: &std::path::Path, mutate: F) { + let path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&path).unwrap()).unwrap(); + mutate(&mut index); + std::fs::write(path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +#[test] +fn arch_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_config(&path, |cfg| { + cfg["arch"] = Value::from("aarch64"); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("architecture") || msg.contains("arch"), + "expected architecture mismatch, got: {}", + msg + ); +} + +#[test] +fn abi_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_config(&path, |cfg| { + cfg["abi_version"] = Value::from(9999u32); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("ABI") || msg.contains("abi"), + "expected ABI version mismatch, got: {}", + msg + ); +} + +#[test] +fn hypervisor_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Pick a hypervisor that is not the current one. + let current = cfg_current_hypervisor(); + let other = if current == "kvm" { "mshv" } else { "kvm" }; + + rewrite_config(&path, |cfg| { + cfg["hypervisor"] = Value::from(other); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("hypervisor"), + "expected hypervisor mismatch, got: {}", + msg + ); +} + +fn cfg_current_hypervisor() -> &'static str { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("probe"); + create_snapshot_from_binary() + .to_oci(&path, "latest") + .unwrap(); + let cfg_path = find_config_blob(&path); + let cfg: Value = serde_json::from_slice(&std::fs::read(&cfg_path).unwrap()).unwrap(); + match cfg["hypervisor"].as_str().unwrap() { + "kvm" => "kvm", + "mshv" => "mshv", + "whp" => "whp", + other => panic!("unknown hypervisor tag {other}"), + } +} + +// ============================================================================= +// Entrypoint vs sregs invariants enforced by serde shape. +// ============================================================================= + +#[test] +fn call_snapshot_without_sregs_rejected() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Strip sregs from the entrypoint variant. serde must reject the + // missing field at parse time. + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + assert_eq!(entry["kind"].as_str().unwrap(), "call"); + entry.remove("sregs"); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("sregs") || msg.contains("missing field") || msg.contains("config"), + "expected serde error about missing sregs, got: {}", + msg + ); +} + +#[test] +fn initialise_snapshot_with_sregs_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Add a bogus sregs field to the Initialise variant. serde must + // reject the unknown field (variant has deny_unknown_fields). + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + assert_eq!(entry["kind"].as_str().unwrap(), "initialise"); + entry.insert("sregs".to_string(), Value::from("{}")); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("sregs") || msg.contains("unknown field") || msg.contains("config"), + "expected serde error about unknown field sregs, got: {}", + msg + ); +} + +// ============================================================================= +// Host functions validation. +// +// `validate_host_functions` enforces a superset relation: every host +// function registered when the snapshot was taken must be present in +// the loaded sandbox's `HostFunctions` with a matching signature. +// Extras in the loaded set are allowed. +// ============================================================================= + +/// Build a `MultiUseSandbox` with the default host functions plus a +/// custom `Add(i32, i32) -> i32`. Used to seed the snapshot side of +/// the host-function validation tests so the snapshot has a +/// non-default required function. +fn create_sandbox_with_custom_host_funcs() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + u.evolve().unwrap() +} + +/// `HostFunctions::default()` plus a matching `Add(i32, i32) -> i32`. +fn host_funcs_with_matching_add() -> HostFunctions { + let mut hf = HostFunctions::default(); + hf.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + hf +} + +#[test] +fn from_snapshot_accepts_matching_host_functions() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), host_funcs_with_matching_add(), None) + .unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn from_snapshot_rejects_missing_host_function() { + // Snapshot was taken with `Add` registered. Loading with the + // default `HostFunctions` (no `Add`) must be rejected. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let err = MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .expect_err("from_snapshot must reject a HostFunctions set missing `Add`"); + let msg = format!("{}", err); + assert!( + msg.contains("missing") && msg.contains("Add"), + "expected missing-host-function error mentioning Add, got: {}", + msg + ); +} + +#[test] +fn from_snapshot_rejects_signature_mismatch() { + // Snapshot has `Add(i32, i32) -> i32`. Load registers an `Add` + // with a different signature. validate_host_functions must + // refuse the mismatch. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = HostFunctions::default(); + hf.register_host_function("Add", |a: String, b: String| Ok(format!("{a}{b}"))) + .unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let err = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None) + .expect_err("from_snapshot must reject a signature mismatch on Add"); + let msg = format!("{}", err); + assert!( + msg.contains("signature mismatches") && msg.contains("Add"), + "expected signature-mismatch error mentioning Add, got: {}", + msg + ); +} + +#[test] +fn from_snapshot_accepts_extra_host_functions() { + // Snapshot has `Add`. Load registers `Add` (matching) plus an + // unrelated `Mul`. Extras are allowed. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = host_funcs_with_matching_add(); + hf.register_host_function("Mul", |a: i32, b: i32| Ok(a * b)) + .unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn from_snapshot_accepts_zero_arg_host_function() { + // A zero-arg host function must round-trip through OCI. + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Zero", || Ok(7i64)).unwrap(); + let mut sbox = u.evolve().unwrap(); + + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = HostFunctions::default(); + hf.register_host_function("Zero", || Ok(7i64)).unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None) + .expect("zero-arg host function must round-trip through OCI"); +} + +#[test] +fn from_snapshot_has_default_host_print() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + let _ = sbox2.call::("PrintTwoArgs", ("hi".to_string(), 42i32)); +} + +// ============================================================================= +// OCI-shape invariants. +// ============================================================================= + +#[test] +fn missing_oci_layout_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::remove_file(path.join("oci-layout")).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("oci-layout"), + "expected missing oci-layout error, got: {}", + msg + ); +} + +#[test] +fn wrong_image_layout_version_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::write( + path.join("oci-layout"), + r#"{"imageLayoutVersion":"99.0.0"}"#, + ) + .unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("image layout version") || msg.contains("imageLayoutVersion"), + "expected layout version error, got: {}", + msg + ); +} + +#[test] +fn missing_index_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::remove_file(path.join("index.json")).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("index.json"), + "expected missing index.json error, got: {}", + msg + ); +} + +#[test] +fn snapshot_blob_size_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Truncate the snapshot blob by one byte. + let blobs_dir = path.join("blobs").join("sha256"); + let manifest_bytes = std::fs::read(path.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = blobs_dir.join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let snap_path = blobs_dir.join(snap_digest); + let bytes = std::fs::read(&snap_path).unwrap(); + std::fs::write(&snap_path, &bytes[..bytes.len() - 1]).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("size") || msg.contains("mismatch"), + "expected size mismatch error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_zero_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + cfg["layout"]["snapshot_size"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("snapshot_size"), + "expected snapshot_size error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_unaligned_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + let s = cfg["layout"]["snapshot_size"].as_u64().unwrap(); + cfg["layout"]["snapshot_size"] = Value::from(s + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("PAGE_SIZE") || msg.contains("multiple"), + "expected page alignment error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_must_match_memory_size() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + let page = hyperlight_common::vmem::PAGE_SIZE as u64; + rewrite_config(&path, |cfg| { + let m = cfg["memory_size"].as_u64().unwrap(); + cfg["layout"]["snapshot_size"] = Value::from(m + page); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("does not equal memory_size"), + "expected snapshot_size + pt_size != memory_size error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_pt_size_unaligned_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + if let Some(p) = cfg["layout"]["pt_size"].as_u64() { + cfg["layout"]["pt_size"] = Value::from(p + 1); + } else { + cfg["layout"]["pt_size"] = Value::from(1u64); + } + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("pt_size") || msg.contains("PAGE_SIZE") || msg.contains("multiple"), + "expected pt_size validation error, got: {}", + msg + ); +} + +#[test] +fn missing_snapshot_blob_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let blobs_dir = path.join("blobs").join("sha256"); + let manifest_bytes = std::fs::read(path.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = blobs_dir.join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + std::fs::remove_file(blobs_dir.join(snap_digest)).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("snapshot blob") || msg.contains("No such") || msg.contains("not found"), + "expected missing-blob error, got: {}", + msg + ); +} + +// ============================================================================= +// Path semantics. +// ============================================================================= + +#[test] +fn from_oci_nonexistent_path_returns_error() { + let err = unwrap_err_snapshot(Snapshot::from_oci("/nonexistent/path/to/oci", "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("stat") || msg.contains("No such") || msg.contains("not found"), + "expected missing-path error, got: {}", + msg + ); +} + +#[test] +fn from_oci_file_not_directory_rejected() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("not-a-dir"); + std::fs::write(&file_path, b"hello").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&file_path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("not a directory"), + "expected not-a-directory error, got: {}", + msg + ); +} + +#[test] +fn to_oci_appends_into_existing_layout_with_new_tag() { + // Two snapshots written to the same directory under different + // tags coexist. Both load back independently. The shared + // `oci-layout` marker and `blobs/sha256/` are reused. + let snap_a = create_snapshot_from_binary(); + let snap_b = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap_a.to_oci(&path, "a").unwrap(); + snap_b.to_oci(&path, "b").unwrap(); + + let _ = Snapshot::from_oci(&path, "a").unwrap(); + let _ = Snapshot::from_oci(&path, "b").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let tags: Vec<&str> = index["manifests"] + .as_array() + .unwrap() + .iter() + .map(|m| { + m["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap() + }) + .collect(); + assert_eq!(tags.len(), 2); + assert!(tags.contains(&"a")); + assert!(tags.contains(&"b")); +} + +#[test] +fn to_oci_replaces_descriptor_for_same_tag() { + // Writing the same tag twice replaces the manifest descriptor + // for that tag. The loader sees the second snapshot, not the + // first. The index ends up with exactly one entry for the tag. + let mut sbox = create_test_sandbox(); + sbox.call::("Echo", "first".to_string()).unwrap(); + let snap_first = sbox.snapshot().unwrap(); + sbox.call::("Echo", "second".to_string()).unwrap(); + let snap_second = sbox.snapshot().unwrap(); + let gen_second = snap_second.snapshot_generation(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap_first.to_oci(&path, "latest").unwrap(); + snap_second.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.snapshot_generation(), gen_second); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let entries: Vec<&Value> = index["manifests"] + .as_array() + .unwrap() + .iter() + .filter(|m| { + m["annotations"]["org.opencontainers.image.ref.name"].as_str() == Some("latest") + }) + .collect(); + assert_eq!(entries.len(), 1, "expected one descriptor for tag 'latest'"); +} + +#[test] +fn to_oci_requires_parent_dir_to_exist() { + // The leaf directory at `path` is created, but the parent + // chain must already exist. A missing ancestor errors and the + // filesystem is left untouched. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let missing_parent = dir.path().join("a").join("b").join("c"); + let path = missing_parent.join("store"); + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("parent directory") || msg.contains("not accessible"), + "expected missing-parent error, got: {msg}" + ); + assert!(!missing_parent.exists(), "no parent dirs should be created"); +} + +#[test] +fn to_oci_creates_leaf_directory() { + // The leaf at `path` is created when missing, as long as the + // parent exists. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + snap.to_oci(&path, "latest").unwrap(); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); +} + +#[test] +fn to_oci_rejects_regular_file_at_path() { + // A regular file at `path` cannot be turned into a directory. + // The call errors and the file is left intact. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("not-a-dir"); + std::fs::write(&path, b"i am a file").unwrap(); + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("is not a directory") || msg.contains("layout dir"), + "expected non-directory error, got: {msg}" + ); + assert_eq!(std::fs::read(&path).unwrap(), b"i am a file"); +} + +#[test] +fn to_oci_rejects_unsupported_existing_layout_version() { + // A pre-existing `oci-layout` with an unknown version is left + // alone and the call errors. Defends against silently rewriting + // a future layout we do not understand. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"99.0.0"}"#, + ) + .unwrap(); + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("imageLayoutVersion") || msg.contains("unsupported"), + "expected unsupported-version error, got: {msg}" + ); + assert!( + !path.join("index.json").exists(), + "to_oci must not have written index.json" + ); +} + +#[test] +fn to_oci_invalid_tag_does_not_touch_filesystem() { + // Tag grammar is checked before any filesystem mutation. An + // empty tag is rejected without creating the layout directory. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + let _ = snap.to_oci(&path, "").unwrap_err(); + assert!(!path.exists(), "target path must not be created on error"); + let leftovers: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .map(|e| e.file_name()) + .collect(); + assert!( + leftovers.is_empty(), + "unexpected leftover entries in parent: {:?}", + leftovers + ); +} + +#[test] +fn to_oci_into_empty_existing_directory() { + // An empty pre-existing directory is treated as a fresh layout + // location. The marker, index, and blobs are all written. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + + snap.to_oci(&path, "latest").unwrap(); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); + assert!(path.join("oci-layout").exists()); + assert!(path.join("index.json").exists()); +} + +#[test] +fn to_oci_preserves_unrelated_files_in_layout_dir() { + // Files inside the layout dir that are not part of the OCI + // structure are left alone. Mirrors the behaviour of every + // merging tool surveyed (containers/image, crane, regclient). + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write(path.join("README.md"), b"keep me").unwrap(); + + snap.to_oci(&path, "latest").unwrap(); + assert_eq!(std::fs::read(path.join("README.md")).unwrap(), b"keep me"); +} + +#[test] +fn to_oci_same_tag_same_content_is_idempotent() { + // Saving the same snapshot under the same tag twice ends up + // with one descriptor for the tag and the same blob count as + // after the first save. Blobs are content-addressed so the + // second write reuses them. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap.to_oci(&path, "latest").unwrap(); + let blobs_after_first: Vec<_> = std::fs::read_dir(path.join("blobs").join("sha256")) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.file_name())) + .collect(); + + snap.to_oci(&path, "latest").unwrap(); + let blobs_after_second: Vec<_> = std::fs::read_dir(path.join("blobs").join("sha256")) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.file_name())) + .collect(); + assert_eq!(blobs_after_first.len(), blobs_after_second.len()); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifests = index["manifests"].as_array().unwrap(); + assert_eq!(manifests.len(), 1); + assert_eq!( + manifests[0]["annotations"]["org.opencontainers.image.ref.name"], + "latest" + ); +} + +#[test] +fn to_oci_shares_blobs_across_tags_with_identical_content() { + // Two tags written from the same in-memory snapshot share all + // three blobs (manifest, config, snapshot). The blob directory + // therefore holds exactly three files even with two tags. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap.to_oci(&path, "a").unwrap(); + snap.to_oci(&path, "b").unwrap(); + + let blobs: Vec<_> = std::fs::read_dir(path.join("blobs").join("sha256")) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.file_name())) + .collect(); + assert_eq!(blobs.len(), 3, "expected 3 deduped blobs, got {:?}", blobs); +} + +#[test] +fn to_oci_replace_in_middle_preserves_other_tags() { + // Replacing one tag in a layout with three tags keeps the + // other two descriptors intact and produces a fresh descriptor + // for the replaced tag. + let mut sbox = create_test_sandbox(); + let snap_a = sbox.snapshot().unwrap(); + sbox.call::("Echo", "x".to_string()).unwrap(); + let snap_b = sbox.snapshot().unwrap(); + sbox.call::("Echo", "y".to_string()).unwrap(); + let snap_c = sbox.snapshot().unwrap(); + sbox.call::("Echo", "z".to_string()).unwrap(); + let snap_b2 = sbox.snapshot().unwrap(); + let gen_b2 = snap_b2.snapshot_generation(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + snap_a.to_oci(&path, "a").unwrap(); + snap_b.to_oci(&path, "b").unwrap(); + snap_c.to_oci(&path, "c").unwrap(); + snap_b2.to_oci(&path, "b").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let tags: Vec<&str> = index["manifests"] + .as_array() + .unwrap() + .iter() + .map(|m| { + m["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap() + }) + .collect(); + assert_eq!(tags.len(), 3); + assert!(tags.contains(&"a")); + assert!(tags.contains(&"b")); + assert!(tags.contains(&"c")); + + let loaded_b = Snapshot::from_oci(&path, "b").unwrap(); + assert_eq!(loaded_b.snapshot_generation(), gen_b2); +} + +#[test] +fn to_oci_rejects_malformed_existing_oci_layout_json() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write(path.join("oci-layout"), b"not json").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("oci-layout") && msg.contains("JSON"), + "expected oci-layout JSON error, got: {msg}" + ); + assert!(!path.join("index.json").exists()); +} + +#[test] +fn to_oci_rejects_existing_oci_layout_missing_version() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write(path.join("oci-layout"), br#"{"other":"field"}"#).unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("imageLayoutVersion"), + "expected missing-version error, got: {msg}" + ); + assert!(!path.join("index.json").exists()); +} + +#[test] +fn to_oci_rejects_malformed_existing_index_json() { + // An existing `oci-layout` with a supported version plus a + // corrupt `index.json` is rejected. We do not silently discard + // someone else's index. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"1.0.0"}"#, + ) + .unwrap(); + std::fs::write(path.join("index.json"), b"{not valid json").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("index.json"), + "expected index.json error, got: {msg}" + ); + assert_eq!( + std::fs::read(path.join("index.json")).unwrap(), + b"{not valid json", + "to_oci must not overwrite a malformed existing index.json" + ); +} + +/// Asserts the integrity contract: a snapshot blob whose bytes have +/// been replaced (without changing length, so descriptor sizes still +/// match) must be rejected on load via digest mismatch. +#[test] +fn from_oci_rejects_snapshot_blob_byte_mutation() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Locate the snapshot blob via the manifest, then flip one byte + // somewhere in the middle. Length is preserved so all descriptor + // size checks still pass. Only a digest re-hash can detect this. + let blobs_dir = path.join("blobs").join("sha256"); + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + let manifest: Value = + serde_json::from_slice(&std::fs::read(blobs_dir.join(&manifest_digest)).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + let snap_path = blobs_dir.join(&snap_digest); + let mut bytes = std::fs::read(&snap_path).unwrap(); + let mid = bytes.len() / 2; + bytes[mid] ^= 0xFF; + std::fs::write(&snap_path, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("hash") || msg.contains("sha256"), + "expected digest-mismatch error, got: {}", + msg + ); +} + +/// Same idea as `from_oci_rejects_snapshot_blob_byte_mutation`, but +/// targeting the config blob. A config-blob mutation that preserves +/// the descriptor size and the structural fields the loader +/// validates today (e.g. flipping a byte inside the host-function +/// flatbuffer payload) must be caught by digest verification. +#[test] +fn from_oci_rejects_config_blob_byte_mutation() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let cfg_path = find_config_blob(&path); + let mut bytes = std::fs::read(&cfg_path).unwrap(); + // Replace the first ASCII brace `{` with a different byte that + // keeps the file the same length but yields a different sha256. + // This will also break JSON parsing, but the point is to assert + // the digest layer rejects it before the parser ever runs. + bytes[0] = b' '; + std::fs::write(&cfg_path, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("hash") || msg.contains("sha256"), + "expected digest-mismatch error, got: {}", + msg + ); +} + +#[test] +fn from_oci_observes_per_path_contents() { + // `to_oci` no longer permits overwriting, so verifying that two + // independent saves have independent contents is done by writing + // each snapshot to its own path and asserting the loaded + // contents differ. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 11i32).unwrap(); + let snap_x = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path_x = dir.path().join("snap_x"); + snap_x.to_oci(&path_x, "latest").unwrap(); + + let loaded_x = Snapshot::from_oci(&path_x, "latest").unwrap(); + let mut sbox_x = + MultiUseSandbox::from_snapshot(Arc::new(loaded_x), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 11); + + sbox.call::("AddToStatic", 44i32).unwrap(); + let snap_y = sbox.snapshot().unwrap(); + let path_y = dir.path().join("snap_y"); + snap_y.to_oci(&path_y, "latest").unwrap(); + + let loaded_y = Snapshot::from_oci(&path_y, "latest").unwrap(); + let mut sbox_y = + MultiUseSandbox::from_snapshot(Arc::new(loaded_y), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_y.call::("GetStatic", ()).unwrap(), 55); +} + +// ============================================================================= +// Exhaustive input-validation tests for `from_oci`. +// +// Every load-side error path in `super::file::from_oci` should be +// exercised here. +// ============================================================================= + +fn save_for_mutation() -> (tempfile::TempDir, std::path::PathBuf) { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + (dir, path) +} + +fn assert_err_contains(err: crate::HyperlightError, needle: &str) { + let msg = format!("{}", err); + assert!( + msg.contains(needle), + "expected error to contain {:?}, got: {}", + needle, + msg + ); +} + +#[test] +fn malformed_oci_layout_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("oci-layout"), b"not-valid-json{").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "oci-layout"); +} + +#[test] +fn oci_layout_missing_version_field_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("oci-layout"), r#"{"unrelated":"field"}"#).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "imageLayoutVersion"); +} + +#[test] +fn malformed_index_json_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("index.json"), b"{not json").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "index.json"); +} + +#[test] +fn empty_index_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + idx["manifests"] = Value::Array(Vec::new()); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn from_oci_rejects_duplicate_tag_in_index() { + // A valid OCI layout has unique tags. Two manifests sharing the + // same `org.opencontainers.image.ref.name` annotation is + // malformed and from_oci must refuse rather than silently + // pick one. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let first = idx["manifests"][0].clone(); + idx["manifests"].as_array_mut().unwrap().push(first); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "multiple manifests tagged"); +} + +#[test] +fn missing_manifest_blob_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::remove_file(manifest_path(&path)).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("open") || msg.contains("No such") || msg.contains("not found"), + "expected missing-manifest error, got: {}", + msg + ); +} + +#[test] +fn bad_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + // Strip the algorithm prefix entirely. `oci-spec` validates + // descriptor digests on parse, so the index parser rejects + // this before our own digest helper sees it. + idx["manifests"][0]["digest"] = Value::from("deadbeef"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("index.json"), + "expected digest or parse error, got: {}", + msg + ); +} + +#[test] +fn malformed_manifest_json_rejected() { + // Probes the manifest JSON parser. Under `from_oci`, the + // digest-verification step would fire first and short-circuit + // this; that path is covered by + // `from_oci_rejects_manifest_blob_byte_mutation`. Use + // `from_oci_unchecked` here to reach the parser. + let (_dir, path) = save_for_mutation(); + let mp = manifest_path(&path); + std::fs::write(&mp, b"{not json").unwrap(); + // Update index size to match so we hit the JSON parser, not the + // size check. + let new_len = std::fs::metadata(&mp).unwrap().len(); + rewrite_index(&path, |idx| { + idx["manifests"][0]["size"] = Value::from(new_len); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "manifest"); +} + +#[test] +fn wrong_manifest_schema_version_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["schemaVersion"] = Value::from(99u32); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "schemaVersion"); +} + +#[test] +fn unknown_config_media_type_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["config"]["mediaType"] = Value::from("application/vnd.example.unknown.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "config media type"); +} + +#[test] +fn empty_layers_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"] = Value::Array(Vec::new()); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "layer"); +} + +#[test] +fn extra_layers_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + let first = m["layers"][0].clone(); + m["layers"].as_array_mut().unwrap().push(first); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "layer"); +} + +#[test] +fn unknown_snapshot_layer_media_type_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"][0]["mediaType"] = Value::from("application/vnd.example.unknown.v1"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "snapshot layer media type"); +} + +/// Manifest- and index-level annotations injected by third-party +/// tools (cosign, ORAS, build pipelines, etc.) must NOT break load. +/// `OciSnapshotConfig` is intentionally strict (`deny_unknown_fields`) but +/// the OCI envelope around it is parsed via `oci-spec`'s lenient +/// types. +#[test] +fn manifest_and_index_annotations_tolerated() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_manifest(&path, |m| { + let mut anns = serde_json::Map::new(); + anns.insert( + "org.opencontainers.image.created".to_string(), + Value::from("2024-01-01T00:00:00Z"), + ); + anns.insert( + "dev.sigstore.cosign/signature".to_string(), + Value::from("MEUCIQDsignature"), + ); + m["annotations"] = Value::Object(anns); + }); + rewrite_index(&path, |idx| { + let mut anns = serde_json::Map::new(); + anns.insert( + "org.opencontainers.image.ref.name".to_string(), + Value::from("v1.2.3"), + ); + idx["annotations"] = Value::Object(anns); + }); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn config_blob_size_descriptor_mismatch_rejected() { + let (_dir, path) = save_for_mutation(); + // Bump the config descriptor's claimed size by one without + // touching the actual blob. + rewrite_manifest(&path, |m| { + let sz = m["config"]["size"].as_u64().unwrap(); + m["config"]["size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "config blob size mismatch"); +} + +#[test] +fn malformed_config_json_rejected() { + // Probes the config JSON parser. Under `from_oci` the + // digest-verification step would fire first; that path is + // covered by `from_oci_rejects_config_blob_byte_mutation`. + // Use `from_oci_unchecked` here to reach the parser. + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + std::fs::write(&cfg_path, b"{not json").unwrap(); + // Update both the manifest's config descriptor size and the + // index's manifest descriptor size to match so we reach the + // JSON parser, not the size check. + let new_cfg_len = std::fs::metadata(&cfg_path).unwrap().len(); + rewrite_manifest(&path, |m| { + m["config"]["size"] = Value::from(new_cfg_len); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "config JSON"); +} + +#[test] +fn memory_size_zero_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + cfg["memory_size"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "memory_size"); +} + +#[test] +fn memory_size_unaligned_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let sz = cfg["memory_size"].as_u64().unwrap(); + cfg["memory_size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + // Either the page-alignment check or the file-size check trips. + // Both are valid signals that the value was rejected. + assert!( + msg.contains("memory_size") || msg.contains("PAGE_SIZE") || msg.contains("size"), + "expected memory_size rejection, got: {}", + msg + ); +} + +#[test] +fn bad_init_data_permissions_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + // 1u32 << 31 is well outside the defined READ|WRITE|EXECUTE bits. + cfg["layout"]["init_data_permissions"] = Value::from(0x8000_0000u32); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "init_data_permissions"); +} + +#[test] +fn entrypoint_addr_outside_snapshot_region_rejected() { + // A crafted config can claim any u64 as the entry point. The + // loader must refuse addresses that don't lie within + // [BASE_ADDRESS, BASE_ADDRESS + snapshot_size) so a malicious + // image can't direct execution into unmapped GPA space or into + // the writable scratch region. + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + // 0xDEAD_BEEF_0000 is far above any plausible snapshot + // region (snapshot_size is bounded by MAX_MEMORY_SIZE, + // ~16 GiB) and outside guest mapped memory. + entry["addr"] = Value::from(0xDEAD_BEEF_0000u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "entrypoint addr"); +} + +#[test] +fn entrypoint_addr_below_base_address_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + // 0 is below BASE_ADDRESS (0x1000); rejected as "outside the + // snapshot region". + entry["addr"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "entrypoint addr"); +} + +// ============================================================================= +// `from_oci_unchecked`: skips blob digest verification but still runs +// every other validator (OCI structure, descriptor sizes, schema +// versions, arch / hypervisor / ABI tags, layout bounds, entrypoint +// bounds). +// ============================================================================= + +#[test] +fn from_oci_unchecked_round_trips() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci_unchecked(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + let result: String = sbox2.call("Echo", "hi\n".to_string()).unwrap(); + assert_eq!(result, "hi\n"); +} + +#[test] +fn from_oci_unchecked_still_validates_config_fields() { + // Field-level validators (arch, abi, hypervisor, layout bounds, + // entrypoint bounds) must still fire under `from_oci_unchecked`. + // Use `rewrite_config` so the layout stays self-consistent + // (otherwise the checked path would also catch this via the + // descriptor-size check before the field validator runs). + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + cfg["arch"] = Value::from("aarch64"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("architecture") || msg.contains("arch"), + "expected architecture mismatch under from_oci_unchecked, got: {}", + msg + ); +} + +#[test] +fn from_oci_rejects_manifest_blob_byte_mutation() { + // Mutate a manifest body byte (without updating the index's + // descriptor digest) and confirm the loader catches it via + // digest verification before any of the field-level manifest + // validators (schema version, media type, etc.) run. + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let mp = manifest_path(&path); + let mut bytes = std::fs::read(&mp).unwrap(); + // Flip the first byte. Length is preserved so the descriptor + // size check still passes; only digest verification can detect + // this. The byte will also break JSON parsing, but the digest + // check fires first. + bytes[0] ^= 0x20; + std::fs::write(&mp, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "digest mismatch"); +} + +#[test] +fn from_oci_unknown_tag_lists_available_tags() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "alpha").unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "missing")); + let msg = format!("{}", err); + assert!( + msg.contains("no manifest tagged") && msg.contains("\"missing\""), + "expected unknown-tag error mentioning the requested tag, got: {}", + msg + ); + assert!( + msg.contains("alpha"), + "expected available-tags listing to include the actual tag, got: {}", + msg + ); +} + +#[test] +fn manifest_descriptor_carries_ref_name_annotation() { + // The OCI standard tag annotation must be set on the manifest + // descriptor in `index.json` so external tools (`oras`, + // `crane manifest`, `skopeo inspect`) see the tag. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "production-v3").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifest = &index["manifests"][0]; + assert_eq!( + manifest["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap(), + "production-v3" + ); +} + +// ============================================================================= +// Tag validation. +// ============================================================================= + +#[test] +fn empty_tag_rejected_on_save() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap.to_oci(dir.path().join("snap"), "").unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn empty_tag_rejected_on_load() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "")); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn tag_with_illegal_leading_char_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap + .to_oci(dir.path().join("snap"), ".dotleader") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); + + let err = snap + .to_oci(dir.path().join("snap"), "-dashleader") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn tag_with_illegal_chars_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap + .to_oci(dir.path().join("snap"), "with/slash") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); + + let err = snap + .to_oci(dir.path().join("snap"), "with space") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn long_tag_within_limit_accepted() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let tag: String = "a".repeat(128); + snap.to_oci(dir.path().join("snap"), &tag).unwrap(); + let _ = Snapshot::from_oci(dir.path().join("snap"), &tag).unwrap(); +} + +#[test] +fn over_long_tag_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let tag: String = "a".repeat(129); + let err = snap.to_oci(dir.path().join("snap"), &tag).unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +// ============================================================================= +// Save-shape invariants. Verify the on-disk JSON we hand to standard +// OCI tools matches what the spec prescribes. +// ============================================================================= + +#[test] +fn manifest_descriptor_uses_image_manifest_media_type() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + assert_eq!( + index["manifests"][0]["mediaType"].as_str().unwrap(), + "application/vnd.oci.image.manifest.v1+json" + ); +} + +#[test] +fn manifest_descriptor_non_image_manifest_rejected() { + // The loader must refuse to follow a descriptor that does not + // advertise an OCI image manifest, even if the blob it points + // at would parse. This prevents misuse where an image index or + // an unrelated artifact has been published under our tag. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + rewrite_index(&path, |idx| { + idx["manifests"][0]["mediaType"] = Value::from("application/vnd.oci.image.index.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("unexpected media type"), + "expected manifest-descriptor media type error, got: {}", + msg + ); +} + +#[test] +fn manifest_uses_correct_config_and_layer_media_types() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let manifest: Value = + serde_json::from_slice(&std::fs::read(manifest_path(&path)).unwrap()).unwrap(); + assert_eq!( + manifest["config"]["mediaType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.config.v1+json" + ); + assert_eq!(manifest["layers"].as_array().unwrap().len(), 1); + assert_eq!( + manifest["layers"][0]["mediaType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.memory.v1" + ); + // `artifactType` mirrors `config.mediaType` so registries that surface + // the distribution-spec referrers API report a useful type, and tooling + // that predates `artifactType` and falls back to `config.mediaType` + // sees the same value. + assert_eq!( + manifest["artifactType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.config.v1+json" + ); +} + +#[test] +fn manifest_missing_artifact_type_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + rewrite_manifest(&path, |m| { + m.as_object_mut().unwrap().remove("artifactType"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "missing required artifactType"); +} + +#[test] +fn manifest_mismatched_artifact_type_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + rewrite_manifest(&path, |m| { + m["artifactType"] = Value::from("application/vnd.example.bogus.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "does not match config media type"); +} + +#[test] +fn save_writes_oci_layout_marker() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let marker: Value = + serde_json::from_slice(&std::fs::read(path.join("oci-layout")).unwrap()).unwrap(); + assert_eq!(marker["imageLayoutVersion"].as_str().unwrap(), "1.0.0"); +} + +// ============================================================================= +// Tag selection edge cases. +// ============================================================================= + +#[test] +fn tag_lookup_is_case_sensitive() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "MyTag").unwrap(); + + // Different case must NOT match. + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "mytag")); + assert_err_contains(err, "no manifest tagged"); + + // Exact case loads. + let _ = Snapshot::from_oci(&path, "MyTag").unwrap(); +} + +#[test] +fn ref_name_annotation_key_is_case_sensitive() { + // If the index uses a misspelled annotation key (e.g. + // `org.OpenContainers.image.ref.name`), the manifest is treated + // as untagged and from_oci must not load it under any name. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let anns = idx["manifests"][0]["annotations"].as_object_mut().unwrap(); + let value = anns.remove("org.opencontainers.image.ref.name").unwrap(); + anns.insert("org.OpenContainers.image.ref.name".to_string(), value); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn tag_with_all_valid_special_chars_accepted() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + let tag = "v1.2.3-rc.1_build"; + snap.to_oci(&path, tag).unwrap(); + let _ = Snapshot::from_oci(&path, tag).unwrap(); +} + +#[test] +fn other_descriptor_annotations_do_not_interfere() { + // A manifest descriptor with the standard ref.name annotation + // PLUS unrelated annotations (cosign signatures, build + // pipelines, etc.) must still resolve by tag. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let anns = idx["manifests"][0]["annotations"].as_object_mut().unwrap(); + anns.insert( + "dev.sigstore.cosign/signature".to_string(), + Value::from("MEUCIQDfake"), + ); + anns.insert("io.example.build.id".to_string(), Value::from("12345")); + }); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); +} + +// ============================================================================= +// Bad sha256 digest format on the inner descriptors (config and snapshot +// layer). The index-side equivalent is `bad_digest_format_rejected`. +// ============================================================================= + +#[test] +fn bad_config_descriptor_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["config"]["digest"] = Value::from("md5:deadbeef"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("digest"), + "expected digest-format error, got: {msg}" + ); +} + +#[test] +fn bad_snapshot_layer_descriptor_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"][0]["digest"] = Value::from("sha256:tooshort"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("digest"), + "expected digest-format error, got: {msg}" + ); +} + +// ============================================================================= +// Missing inner blobs. +// ============================================================================= + +#[test] +fn missing_config_blob_rejected() { + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + std::fs::remove_file(&cfg_path).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("open") || msg.contains("No such") || msg.contains("not found"), + "expected missing-config-blob error, got: {msg}" + ); +} + +// ============================================================================= +// Size-bound enforcement. +// ============================================================================= + +#[test] +fn manifest_blob_too_large_rejected() { + // The manifest reader bounds to 1 MiB. Replace the manifest + // with junk longer than that and confirm the bound trips + // before any parsing. + let (_dir, path) = save_for_mutation(); + let mp = manifest_path(&path); + let huge = vec![b'a'; (1024 * 1024 + 16) as usize]; + std::fs::write(&mp, &huge).unwrap(); + // Update descriptor size to match so we hit the bound check, + // not the size mismatch check. + rewrite_index(&path, |idx| { + idx["manifests"][0]["size"] = Value::from(huge.len() as u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "exceeds maximum allowed"); +} + +#[test] +fn config_blob_too_large_rejected() { + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + let huge = vec![b'a'; (1024 * 1024 + 16) as usize]; + std::fs::write(&cfg_path, &huge).unwrap(); + rewrite_manifest(&path, |m| { + m["config"]["size"] = Value::from(huge.len() as u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "exceeds maximum allowed"); +} + +#[test] +fn memory_size_too_large_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + // 16 GiB exceeds MAX_MEMORY_SIZE. + cfg["memory_size"] = Value::from(16u64 * 1024 * 1024 * 1024); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "memory_size"); +} + +#[test] +fn snapshot_descriptor_size_disagrees_with_file_rejected() { + // Snapshot descriptor claims a different size than the actual + // blob file. The loader must reject before mmap-ing. + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + let sz = m["layers"][0]["size"].as_u64().unwrap(); + m["layers"][0]["size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("snapshot blob size"), + "expected snapshot-blob descriptor disagreement error, got: {msg}" + ); +} + +// ============================================================================= +// `from_oci_unchecked` shares the same non-digest validators with +// `from_oci`. The key safety claim of the unchecked path is that it +// is faster, NOT that it is more permissive about anything other +// than digest checks. Pin that contract down here. +// ============================================================================= + +#[test] +fn from_oci_unchecked_validates_tag_format() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "bad/tag")); + assert_err_contains(err, "tag"); +} + +#[test] +fn from_oci_unchecked_rejects_unknown_tag() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "nosuch")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn from_oci_unchecked_rejects_path_not_directory() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("not-a-dir"); + std::fs::write(&file_path, b"hi").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&file_path, "latest")); + assert_err_contains(err, "not a directory"); +} + +#[test] +fn from_oci_unchecked_rejects_missing_oci_layout_marker() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + std::fs::remove_file(path.join("oci-layout")).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "oci-layout"); +} + +// ============================================================================= +// Round-trip data fidelity. +// +// The serde shape tests already prove individual fields parse, but +// they don't prove that all the values that came out of the producer +// reach the loaded snapshot. These tests pin down full round-trip +// fidelity for fields that are not exercised by the +// "load-then-call-the-guest" round-trip tests above. +// ============================================================================= + +#[test] +fn round_trip_preserves_stack_top_gva() { + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original = snap.stack_top_gva(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.stack_top_gva(), original); +} + +#[test] +fn round_trip_preserves_non_default_scratch_size() { + use crate::sandbox::SandboxConfiguration; + let mut cfg = SandboxConfiguration::default(); + let custom_scratch: usize = 256 * 1024; + cfg.set_scratch_size(custom_scratch); + let snap = Snapshot::from_env( + GuestBinary::FilePath(simple_guest_as_string().unwrap()), + cfg, + ) + .unwrap(); + let original = snap.layout().get_scratch_size(); + assert_eq!(original, custom_scratch); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().get_scratch_size(), custom_scratch); +} + +#[test] +fn pre_init_snapshot_writes_initialise_entrypoint_kind() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert_eq!(cfg["entrypoint"]["kind"].as_str().unwrap(), "initialise"); + assert!( + cfg["entrypoint"].get("sregs").is_none(), + "Initialise snapshot must not carry sregs in the config" + ); +} + +#[test] +fn already_initialised_snapshot_writes_call_entrypoint_kind() { + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert_eq!(cfg["entrypoint"]["kind"].as_str().unwrap(), "call"); + assert!( + cfg["entrypoint"]["sregs"].is_object(), + "Call snapshot must carry sregs in the config" + ); +} + +#[test] +fn round_trip_preserves_host_function_signatures() { + // Save a snapshot with a custom host function signature, load + // it, and confirm the recorded signatures survive. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + let funcs = cfg["host_functions"].as_array().unwrap(); + let add = funcs + .iter() + .find(|f| f["function_name"].as_str().unwrap() == "Add") + .expect("Add must be recorded"); + assert_eq!( + add["parameter_types"].as_array().unwrap().len(), + 2, + "Add signature must record two parameters" + ); + // Loading and using the snapshot must accept the same signature. + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _ = MultiUseSandbox::from_snapshot(Arc::new(loaded), host_funcs_with_matching_add(), None) + .unwrap(); +} + +#[test] +fn snapshot_with_no_host_functions_round_trips() { + // A snapshot with `host_functions: []` must round-trip without + // confusing the loader (which has special handling for the + // empty-vs-None case). + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert!( + cfg["host_functions"].as_array().unwrap().is_empty(), + "expected empty host_functions array for pre-init snapshot" + ); + + // The default HostFunctions set is sufficient because the + // snapshot requires nothing. + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _ = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); +} + +// ============================================================================= +// Snapshot lineage and restore semantics. +// +// Hyperlight's snapshot model is NOT a tree. Each `MultiUseSandbox` +// has a process-local `sandbox_id`; `snapshot()` tags the snapshot +// with that id; `from_snapshot(snap)` adopts `snap.sandbox_id()` so +// the new sandbox can restore back to it; and `restore(snap)` +// requires `self.id == snap.sandbox_id()`. So sandboxes built from +// clones of the same `Arc` form a flat id-equivalence +// class within which restore is freely interchangeable. +// +// These tests pin down all the combinations of build-from-snapshot, +// take-more-snapshots, restore-out-of-order, and reject-across-class +// that follow from that model. +// ============================================================================= + +#[test] +fn linear_chain_restore_in_order() { + // Take three snapshots at different states in one sandbox, then + // restore to each in chronological order. After each restore, + // the static counter must read the value it had when that + // snapshot was taken. + let mut sbox = create_test_sandbox(); + let s0 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let s10 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 20i32).unwrap(); + let s30 = sbox.snapshot().unwrap(); + + sbox.restore(s0.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.restore(s10.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 10); + sbox.restore(s30.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 30); +} + +#[test] +fn linear_chain_restore_out_of_order() { + // Restore through the same chain but in a non-monotonic order + // (forward, back, forward, back). Snapshots within one + // id-equivalence class are NOT ordered by when they were + // taken: any can be restored to from any other. + let mut sbox = create_test_sandbox(); + let s0 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 7i32).unwrap(); + let s7 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 100i32).unwrap(); + let s107 = sbox.snapshot().unwrap(); + + let order = [&s107, &s0, &s7, &s107, &s0]; + let expected = [107, 0, 7, 107, 0]; + for (snap, want) in order.iter().zip(expected.iter()) { + sbox.restore((*snap).clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), *want); + } +} + +#[test] +fn restore_then_call_then_snapshot_then_restore() { + // Restore changes the live state, but it must NOT invalidate + // the snapshot that was just used. After restoring to S1, the + // sandbox can still take a new snapshot and restore back to + // either S1 or the new one. + let mut sbox = create_test_sandbox(); + let s_init = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + + // Restore back to init. + sbox.restore(s_init.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + + // Mutate again, snapshot, mutate further. + sbox.call::("AddToStatic", 9i32).unwrap(); + let s_post_restore = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 100i32).unwrap(); + + // Restore to either reachable snapshot. + sbox.restore(s_post_restore.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 9); + sbox.restore(s_init.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn restore_idempotent() { + // Restoring to the same snapshot twice in a row must produce + // the same observable state both times. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 11i32).unwrap(); + let s = sbox.snapshot().unwrap(); + + sbox.call::("AddToStatic", 22i32).unwrap(); + sbox.restore(s.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 11); + + // No mutation between restores. + sbox.restore(s.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 11); + + // Mutation after the second restore must take effect. + sbox.call::("AddToStatic", 1i32).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 12); +} + +#[test] +fn from_snapshot_then_snapshot_then_restore_to_both() { + // Build sandbox B from snapshot S0 (B inherits S0's id). + // B takes its own snapshot S1 (also tagged with S0's id). Both + // S0 and S1 must be reachable from B via `restore`. + // + // Note: only snapshots taken from a RUNNING sandbox (with + // sregs) are valid restore targets. We therefore start from a + // snapshot of a running sandbox, not a pre-init snapshot. + let mut seed = create_test_sandbox(); + let s0 = seed.snapshot().unwrap(); + + let mut b = MultiUseSandbox::from_snapshot(s0.clone(), HostFunctions::default(), None).unwrap(); + b.call::("AddToStatic", 5i32).unwrap(); + let s1 = b.snapshot().unwrap(); + b.call::("AddToStatic", 10i32).unwrap(); + + // Restore back to S1. + b.restore(s1.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 5); + + // Restore back further to the constructor snapshot S0. + b.restore(s0.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn arc_clone_lineage_two_sandboxes_each_restores_to_either() { + // Two sandboxes built from the SAME Arc share the + // sandbox_id. Each takes its own snapshot. Each must be + // restorable to (a) its own derived snapshot, (b) the shared + // root snapshot, and (c) the OTHER sandbox's derived snapshot + // (because all four snapshots share one id). + // + // Note: the shared root must be a running-sandbox snapshot so + // that restore() can use its sregs. + let mut seed = create_test_sandbox(); + let snap_root = seed.snapshot().unwrap(); + + let mut a = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + let mut b = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + + a.call::("AddToStatic", 3i32).unwrap(); + let snap_a = a.snapshot().unwrap(); + + b.call::("AddToStatic", 70i32).unwrap(); + let snap_b = b.snapshot().unwrap(); + + // a: own snap then root then b's snap. + a.restore(snap_a.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 3); + a.restore(snap_root.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 0); + a.restore(snap_b.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 70); + + // b: cross-restore the other way. + b.restore(snap_a.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 3); + b.restore(snap_root.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); + b.restore(snap_b.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 70); +} + +#[test] +fn separate_from_snapshot_calls_share_id_class_through_lineage() { + // Build sandbox A from a running-sandbox snapshot snap_root. + // A takes snap_a. Then build sandbox B from snap_a (a different + // Arc, but B adopts snap_a.sandbox_id == snap_root.sandbox_id). + // B must be restorable to BOTH snap_a and snap_root because + // they all share one id. + let mut seed = create_test_sandbox(); + let snap_root = seed.snapshot().unwrap(); + + let mut a = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + a.call::("AddToStatic", 5i32).unwrap(); + let snap_a = a.snapshot().unwrap(); + + let mut b = + MultiUseSandbox::from_snapshot(snap_a.clone(), HostFunctions::default(), None).unwrap(); + b.restore(snap_a.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 5); + b.restore(snap_root.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn separate_oci_loads_are_mutually_restore_compatible() { + // Each `from_oci` call rehydrates a structurally identical + // snapshot. Compatibility is determined by memory layout and + // host-function set, so a sandbox built from one load accepts + // a snapshot from any other load of the same image. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + + let s_x = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + let s_y = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + + let mut sbox_x = + MultiUseSandbox::from_snapshot(s_x.clone(), HostFunctions::default(), None).unwrap(); + sbox_x.restore(s_y.clone()).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 0); + + sbox_x.restore(s_x.clone()).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn oci_loaded_snapshot_supports_full_lifecycle() { + // Full round-trip: save (from a running sandbox so the loaded + // snapshot is a valid restore target), load, build sandbox, + // mutate, snapshot, mutate, restore, mutate, snapshot, restore. + // Both pre- and post-load snapshots in the loaded id class must + // remain restore-compatible across an arbitrary number of + // cycles. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox.call::("AddToStatic", 1i32).unwrap(); + let s1 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 2i32).unwrap(); + let s3 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + + sbox.restore(s1.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 1); + sbox.restore(s3.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 3); + sbox.restore(loaded.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + + // Take a fresh snapshot post-restore. It is in the same id + // class and remains interchangeable with the others. + let s_post = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 50i32).unwrap(); + sbox.restore(s_post.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.restore(s3.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 3); +} + +#[test] +fn restore_invariant_under_intermediate_mutations() { + // Restoring to S followed by an arbitrary number of + // mutate-then-restore cycles must always produce the same + // post-restore observable state. This is the core "snapshot + // and restore really mean what they say" property. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 13i32).unwrap(); + let s = sbox.snapshot().unwrap(); + + let mutations = [3, 5, 7, 11, 13, 17, 19]; + for m in mutations { + sbox.call::("AddToStatic", m).unwrap(); + sbox.restore(s.clone()).unwrap(); + assert_eq!( + sbox.call::("GetStatic", ()).unwrap(), + 13, + "restore must reset to the snapshotted value regardless of intermediate mutation {m}" + ); + } +} + +#[test] +fn many_arc_clones_one_snapshot_share_id() { + // Cloning Arc N times yields N references with + // identical sandbox_id. Each sandbox built from a clone shares + // the id and is mutually restore-compatible. Verifies that the + // id-equivalence-class semantics hold for arbitrary fan-out. + // + // The shared root must be a running-sandbox snapshot so the + // sandboxes can restore to it. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let mut sandboxes: Vec = (0..4) + .map(|_| { + MultiUseSandbox::from_snapshot(snap.clone(), HostFunctions::default(), None).unwrap() + }) + .collect(); + + // Each sandbox takes its own derived snapshot tagged with a + // unique value. + let mut snaps: Vec> = Vec::new(); + for (i, s) in sandboxes.iter_mut().enumerate() { + s.call::("AddToStatic", (i as i32 + 1) * 10).unwrap(); + snaps.push(s.snapshot().unwrap()); + } + + // Every sandbox can restore to every snapshot in the class. + for (i, sbox) in sandboxes.iter_mut().enumerate() { + for (j, target) in snaps.iter().enumerate() { + sbox.restore(target.clone()).unwrap(); + let want = (j as i32 + 1) * 10; + assert_eq!( + sbox.call::("GetStatic", ()).unwrap(), + want, + "sandbox {i} restored to snapshot {j} should observe value {want}" + ); + } + // And to the root snapshot. + sbox.restore(snap.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + } +} + +// ============================================================================= +// `from_snapshot` config plumbing. +// ============================================================================= +// +// `from_snapshot` accepts a caller-supplied `SandboxConfiguration`. +// Layout fields must be silently overridden by the snapshot (the +// on-disk memory blob already encodes those sizes). Runtime fields +// must take effect. + +/// Layout fields supplied via `SandboxConfiguration` must be silently +/// overridden. The snapshot's own layout is authoritative. +#[test] +fn from_snapshot_silently_ignores_layout_overrides() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let original_input = snapshot.layout().input_data_size; + let original_output = snapshot.layout().output_data_size; + let original_heap = snapshot.layout().heap_size; + let original_scratch = snapshot.layout().get_scratch_size(); + + let mut config = SandboxConfiguration::default(); + config.set_input_data_size(original_input * 2); + config.set_output_data_size(original_output * 2); + config.set_heap_size((original_heap as u64) * 2); + config.set_scratch_size(original_scratch * 2); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot.clone(), HostFunctions::default(), Some(config)) + .unwrap(); + + sbox2.call::("GetStatic", ()).unwrap(); + + let new_snap = sbox2.snapshot().unwrap(); + assert_eq!(new_snap.layout().input_data_size, original_input); + assert_eq!(new_snap.layout().output_data_size, original_output); + assert_eq!(new_snap.layout().heap_size, original_heap); + assert_eq!(new_snap.layout().get_scratch_size(), original_scratch); +} + +/// `from_snapshot` honors `guest_core_dump=true` so that +/// `generate_crashdump_to_dir` writes a file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_enabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(true); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + !entries.is_empty(), + "expected core dump file when guest_core_dump=true" + ); +} + +/// `from_snapshot` honors `guest_core_dump=false` so that +/// `generate_crashdump_to_dir` produces no file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_disabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(false); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + entries.is_empty(), + "expected no core dump file when guest_core_dump=false, found {:?}", + entries.iter().map(|e| e.path()).collect::>() + ); +} + +/// Loading from OCI must reset `snapshot_generation` to 0, regardless +/// of what generation the source sandbox was at when it saved. +#[test] +fn snapshot_generation_resets_on_oci_load() { + let dir = tempfile::tempdir().unwrap(); + let oci_dir = dir.path().join("layout"); + + let mut sbox = create_test_sandbox(); + // Bump generation by taking + restoring a snapshot a few times. + for _ in 0..3 { + let s = sbox.snapshot().unwrap(); + sbox.restore(s).unwrap(); + } + let live = sbox.snapshot().unwrap(); + assert!( + live.snapshot_generation() > 0, + "expected nonzero generation after restore cycles" + ); + + live.to_oci(&oci_dir, "gen-reset").unwrap(); + let loaded = Snapshot::from_oci(&oci_dir, "gen-reset").unwrap(); + assert_eq!( + loaded.snapshot_generation(), + 0, + "snapshot_generation must reset to 0 on OCI load" + ); +} + +/// Non-default `init_data_permissions` survive an OCI round-trip +/// byte-for-byte. The default code path uses `READ`, so this pins +/// `READ | WRITE` instead. A regression in the permission +/// serialisation would silently downgrade or upgrade access to the +/// init_data region. +#[test] +fn round_trip_preserves_non_default_init_data_permissions() { + use crate::mem::memory_region::MemoryRegionFlags; + use crate::sandbox::SandboxConfiguration; + use crate::sandbox::uninitialized::{GuestBlob, GuestEnvironment}; + + let path = simple_guest_as_string().unwrap(); + let data: &[u8] = b"perm-pinned-init-data"; + let env = GuestEnvironment { + guest_binary: GuestBinary::FilePath(path), + init_data: Some(GuestBlob { + data, + permissions: MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, + }), + }; + let snap = Snapshot::from_env(env, SandboxConfiguration::default()).unwrap(); + let expected = snap.layout().init_data_permissions; + assert_eq!( + expected, + Some(MemoryRegionFlags::READ | MemoryRegionFlags::WRITE), + "fixture must produce non-default init_data_permissions", + ); + + let dir = tempfile::tempdir().unwrap(); + let oci_dir = dir.path().join("layout"); + snap.to_oci(&oci_dir, "perms").unwrap(); + let loaded = Snapshot::from_oci(&oci_dir, "perms").unwrap(); + assert_eq!(loaded.layout().init_data_permissions, expected); +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index 91fad0d4c..f78841ccc 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -14,6 +14,10 @@ See the License for the specific language governing permissions and limitations under the License. */ +mod file; +mod file_tests; +mod tripwires; + use std::collections::{BTreeMap, HashMap}; use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; @@ -271,7 +275,7 @@ fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) { impl Snapshot { /// Create a new snapshot from the guest binary identified by `env`. With the configuration /// specified in `cfg`. - pub(crate) fn from_env<'a, 'b>( + pub fn from_env<'a, 'b>( env: impl Into>, cfg: SandboxConfiguration, ) -> Result { diff --git a/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs new file mode 100644 index 000000000..2385c5fd2 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs @@ -0,0 +1,102 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Compile-time tripwires for the snapshot ABI. +//! +//! Each assertion below pins one piece of the on-disk or in-memory +//! contract that snapshots depend on: the manifest media types, the +//! OCI Image Layout version, the `HyperlightPEB` field offsets, and +//! the `OutBAction` port numbers. A change to any of these means +//! snapshots produced by older builds can no longer be loaded +//! correctly by this build. +//! +//! When one of these assertions fires, the change is breaking the +//! snapshot ABI. The fix is one of: +//! +//! * Avoid the break entirely. Reshape the change so the on-disk +//! contract does not move. +//! * Make the change backwards compatible (add a versioned variant, +//! add a compatibility path in the loader) and leave the pinned +//! values here alone. +//! * Accept the break: bump [`super::file::SNAPSHOT_ABI_VERSION`] +//! together with `EXPECTED_ABI_VERSION` below, and update any +//! other `EXPECTED_*` constants here to match whatever the source +//! values now are. Snapshots produced by older builds will be +//! rejected at load time by the version check, so they must be +//! regenerated. Call this out in the release notes. + +use super::file::{ + MT_CONFIG_CURRENT, MT_SNAPSHOT_CURRENT, OCI_LAYOUT_VERSION, SNAPSHOT_ABI_VERSION, +}; + +const EXPECTED_ABI_VERSION: u32 = 1; +const EXPECTED_MT_CONFIG: &str = "application/vnd.hyperlight.snapshot.config.v1+json"; +const EXPECTED_MT_SNAPSHOT: &str = "application/vnd.hyperlight.snapshot.memory.v1"; +const EXPECTED_OCI_LAYOUT_VERSION: &str = "1.0.0"; + +const _: () = { + assert!(SNAPSHOT_ABI_VERSION == EXPECTED_ABI_VERSION); + assert!(str_eq(MT_CONFIG_CURRENT, EXPECTED_MT_CONFIG)); + assert!(str_eq(MT_SNAPSHOT_CURRENT, EXPECTED_MT_SNAPSHOT)); + assert!(str_eq(OCI_LAYOUT_VERSION, EXPECTED_OCI_LAYOUT_VERSION)); +}; + +#[cfg(not(feature = "nanvix-unstable"))] +const _: () = { + use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB}; + assert!(std::mem::size_of::() == 16); + assert!(std::mem::size_of::() == 4 * 16); + assert!(std::mem::offset_of!(HyperlightPEB, input_stack) == 0); + assert!(std::mem::offset_of!(HyperlightPEB, output_stack) == 16); + assert!(std::mem::offset_of!(HyperlightPEB, init_data) == 32); + assert!(std::mem::offset_of!(HyperlightPEB, guest_heap) == 48); +}; + +#[cfg(feature = "nanvix-unstable")] +const _: () = { + use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB}; + assert!(std::mem::size_of::() == 16); + assert!(std::mem::size_of::() == 5 * 16); + assert!(std::mem::offset_of!(HyperlightPEB, input_stack) == 0); + assert!(std::mem::offset_of!(HyperlightPEB, output_stack) == 16); + assert!(std::mem::offset_of!(HyperlightPEB, init_data) == 32); + assert!(std::mem::offset_of!(HyperlightPEB, guest_heap) == 48); + assert!(std::mem::offset_of!(HyperlightPEB, file_mappings) == 64); +}; + +const _: () = { + use hyperlight_common::outb::OutBAction; + assert!(OutBAction::Log as u16 == 99); + assert!(OutBAction::CallFunction as u16 == 101); + assert!(OutBAction::Abort as u16 == 102); + assert!(OutBAction::DebugPrint as u16 == 103); +}; + +const fn str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true +} diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 6b5a7f8e3..b3b6ce4fb 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -535,7 +535,7 @@ fn guest_malloc_abort() { }); // allocate a vector (on heap) that is bigger than the heap - let heap_size = 0x4000; + let heap_size = 0x6000; let size_to_allocate = 0x10000; assert!( size_to_allocate > heap_size, @@ -616,7 +616,7 @@ fn corrupt_output_back_pointer_rejected() { #[test] fn guest_panic_no_alloc() { - let heap_size = 0x4000; + let heap_size = 0x6000; let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(heap_size); diff --git a/src/hyperlight_host/tests/snapshot_goldens/checks.rs b/src/hyperlight_host/tests/snapshot_goldens/checks.rs new file mode 100644 index 000000000..8a9559f2a --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/checks.rs @@ -0,0 +1,346 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Functional checks against goldens loaded from the on-disk cache. +//! +//! Each check runs against a fresh `MultiUseSandbox` built from +//! the golden for `Check::kind`, so checks are independent and +//! one failure does not poison the next. +//! +//! Adding coverage: write a `fn(&mut MultiUseSandbox) -> Result<(), +//! String>` and add one row to `CHECKS`. + +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; + +use crate::fixtures::{CALL_COUNTER_BUMP, HEAP_PATTERN_LEN, INIT_DATA, register_host_echo_fns}; +use crate::platform::Kind; + +pub struct Check { + pub name: &'static str, + pub kind: Kind, + pub run: fn(&mut MultiUseSandbox) -> Result<(), String>, +} + +pub const CHECKS: &[Check] = &[ + Check { + name: "init/basic_call", + kind: Kind::Init, + run: init_basic_call, + }, + Check { + name: "init/data_round_trip", + kind: Kind::Init, + run: init_data_round_trip, + }, + Check { + name: "init/custom_layout_works", + kind: Kind::Init, + run: init_custom_layout_works, + }, + Check { + name: "call/captured_bss", + kind: Kind::Call, + run: call_captured_bss, + }, + Check { + name: "call/captured_heap_pattern", + kind: Kind::Call, + run: call_captured_heap_pattern, + }, + Check { + name: "call/guest_types_round_trip", + kind: Kind::Call, + run: call_guest_types_round_trip, + }, + Check { + name: "call/host_round_trips", + kind: Kind::Call, + run: call_host_round_trips, + }, + Check { + name: "call/chained_snapshot", + kind: Kind::Call, + run: call_chained_snapshot, + }, +]; + +// ----------------------------------------------------------------- +// init +// ----------------------------------------------------------------- + +/// Loaded init golden answers a basic call and observes a clean +/// BSS. Covers the header layout, layout arithmetic, PEB contents, +/// the dispatch port, the initialise entry convention, and BSS init. +fn init_basic_call(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let value: i32 = sbox + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic: {e}"))?; + if value != 0 { + return Err(format!("fresh init must observe BSS == 0, got {value}")); + } + Ok(()) +} + +/// `INIT_DATA` survives the snapshot round-trip with permissions +/// intact. The guest's `ReadFromUserMemory` returns the captured +/// bytes; a mismatch indicates silent corruption of the init_data +/// region. +fn init_data_round_trip(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let bytes: Vec = sbox + .call( + "ReadFromUserMemory", + (INIT_DATA.len() as u64, INIT_DATA.to_vec()), + ) + .map_err(|e| format!("ReadFromUserMemory: {e}"))?; + if bytes != INIT_DATA { + return Err(format!( + "captured init_data did not round-trip byte-for-byte (len={})", + bytes.len(), + )); + } + Ok(()) +} + +/// Any silent shift in `SandboxMemoryLayout::new` arithmetic with +/// the non-default sizes from `golden_config` would land the PEB or +/// scratch buffers at the wrong addresses; an `Echo` would then +/// fail. +fn init_custom_layout_works(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let got: String = sbox + .call("Echo", "custom-layout".to_string()) + .map_err(|e| format!("Echo: {e}"))?; + if got != "custom-layout" { + return Err(format!("Echo returned {got:?}")); + } + Ok(()) +} + +// ----------------------------------------------------------------- +// call +// ----------------------------------------------------------------- + +/// Captured BSS restores exactly: `COUNTER == CALL_COUNTER_BUMP`. +/// Covers the dispatch convention, sregs apply, page-table +/// relocation, captured stack/BSS. +fn call_captured_bss(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let value: i32 = sbox + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic: {e}"))?; + if value != CALL_COUNTER_BUMP { + return Err(format!( + "captured COUNTER expected {CALL_COUNTER_BUMP}, got {value}", + )); + } + Ok(()) +} + +/// Captured heap state restores exactly: the pinned `Vec` +/// pattern produced by `AllocAndWritePattern` survives across +/// save/load. +fn call_captured_heap_pattern(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let got: Vec = sbox + .call("ReadPattern", ()) + .map_err(|e| format!("ReadPattern: {e}"))?; + let expected: Vec = (0..HEAP_PATTERN_LEN as usize) + .map(|i| (i & 0xff) as u8) + .collect(); + if got != expected { + return Err(format!( + "captured heap pattern mismatch (got len {} expected len {})", + got.len(), + expected.len(), + )); + } + Ok(()) +} + +/// Guest-call wire format for every primitive parameter and return +/// type. Each loop asserts an `EchoT` round-trips. Float NaN goes +/// through `is_nan` since `NaN != NaN`. +fn call_guest_types_round_trip(sbox: &mut MultiUseSandbox) -> Result<(), String> { + macro_rules! echo { + ($name:expr, $ty:ty, $values:expr) => {{ + for &v in $values.iter() { + let got: $ty = sbox + .call($name, v) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + } + }}; + } + echo!("EchoI32", i32, [i32::MIN, -1, 0, 1, i32::MAX]); + echo!("EchoU32", u32, [0u32, 1, u32::MAX]); + echo!("EchoI64", i64, [i64::MIN, -1, 0, 1, i64::MAX]); + echo!("EchoU64", u64, [0u64, 1, u64::MAX]); + echo!( + "EchoFloat", + f32, + [ + 0.0f32, + -1.5, + 1.5, + f32::MIN, + f32::MAX, + f32::INFINITY, + f32::NEG_INFINITY, + ] + ); + let got: f32 = sbox + .call("EchoFloat", f32::NAN) + .map_err(|e| format!("EchoFloat(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoFloat(NaN) returned {got}")); + } + echo!( + "EchoDouble", + f64, + [ + 0.0f64, + -1.5, + 1.5, + f64::MIN, + f64::MAX, + f64::INFINITY, + f64::NEG_INFINITY, + ] + ); + let got: f64 = sbox + .call("EchoDouble", f64::NAN) + .map_err(|e| format!("EchoDouble(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoDouble(NaN) returned {got}")); + } + echo!("EchoBool", bool, [false, true]); + + for v in [String::new(), "hello".to_string(), "héllo 🌍".to_string()] { + let got: String = sbox + .call("Echo", v.clone()) + .map_err(|e| format!("Echo({v:?}): {e}"))?; + if got != v { + return Err(format!("Echo({v:?}) returned {got:?}")); + } + } + for v in [ + Vec::::new(), + vec![0u8, 1, 2, 3, 0xff], + (0..256u32).map(|i| (i & 0xff) as u8).collect::>(), + ] { + let got: Vec = sbox + .call("GetSizePrefixedBuffer", v.clone()) + .map_err(|e| format!("GetSizePrefixedBuffer(len={}): {e}", v.len()))?; + if got != v { + return Err(format!( + "GetSizePrefixedBuffer(len={}) did not round-trip", + v.len(), + )); + } + } + let _: () = sbox.call("NoOp", ()).map_err(|e| format!("NoOp: {e}"))?; + let mixed: i32 = sbox + .call( + "PrintElevenArgs", + ( + "a".to_string(), + 1i32, + 2i64, + "b".to_string(), + "c".to_string(), + true, + false, + 3u32, + 4u64, + 5i32, + 6.5f32, + ), + ) + .map_err(|e| format!("PrintElevenArgs: {e}"))?; + if mixed < 0 { + return Err(format!("PrintElevenArgs returned {mixed}")); + } + Ok(()) +} + +/// Host-call wire format for every primitive parameter and return +/// type. Each `RoundTripHostT` invokes the matching `HostEchoT` on +/// the registered host-fn set. +fn call_host_round_trips(sbox: &mut MultiUseSandbox) -> Result<(), String> { + macro_rules! rt { + ($name:expr, $ty:ty, $value:expr) => {{ + let v: $ty = $value; + let got: $ty = sbox + .call($name, v.clone()) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + }}; + } + rt!("RoundTripHostI32", i32, -7); + rt!("RoundTripHostU32", u32, 0xdead_beef); + rt!("RoundTripHostI64", i64, i64::MIN); + rt!("RoundTripHostU64", u64, u64::MAX); + rt!("RoundTripHostF32", f32, -1.25); + rt!("RoundTripHostF64", f64, 1234.5); + rt!("RoundTripHostBool", bool, false); + rt!("RoundTripHostString", String, "round-trip".to_string()); + rt!("RoundTripHostVecBytes", Vec, vec![0u8, 1, 2, 3, 0xff]); + Ok(()) +} + +/// Snapshot-from-loaded-snapshot path. Mutates state on the loaded +/// call golden, takes a fresh snapshot, round-trips it through an +/// OCI layout on disk, and asserts the mutation survives. +fn call_chained_snapshot(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let val: i32 = sbox + .call("AddToStatic", 5i32) + .map_err(|e| format!("AddToStatic: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "AddToStatic returned {val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + let snap = sbox + .snapshot() + .map_err(|e| format!("take chained snapshot: {e}"))?; + + let tmp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let layout = tmp.path().join("chained"); + let tag = "chained"; + snap.to_oci(&layout, tag) + .map_err(|e| format!("to_oci: {e}"))?; + + let loaded = Snapshot::from_oci(&layout, tag).map_err(|e| format!("from_oci: {e}"))?; + let mut funcs = HostFunctions::default(); + register_host_echo_fns(&mut funcs); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None) + .map_err(|e| format!("from_snapshot: {e}"))?; + let val: i32 = sbox2 + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic on chained: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "chained snapshot observed COUNTER={val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + Ok(()) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs new file mode 100644 index 000000000..0755d0718 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs @@ -0,0 +1,140 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Canonical fixture builders. These define exactly what bytes a +//! goldens push contains. Any change here is a snapshot content +//! change and requires a goldens regen. + +use std::sync::Arc; + +use hyperlight_host::func::Registerable; +use hyperlight_host::sandbox::SandboxConfiguration; +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::sandbox::uninitialized::GuestEnvironment; +use hyperlight_host::{GuestBinary, MultiUseSandbox, UninitializedSandbox}; +use hyperlight_testing::simple_guest_as_string; + +/// Init data bytes baked into the init golden. Loaded back via +/// `ReadFromUserMemory` to assert byte-for-byte round-trip. +pub const INIT_DATA: &[u8] = b"hyperlight-snapshot-golden-init-data\0"; + +/// Heap pattern length used by the call golden. Small enough to +/// stay cheap, large enough to exercise non-trivial heap state. +pub const HEAP_PATTERN_LEN: u64 = 1024; + +/// Value the captured `COUNTER` static must hold in the call +/// golden. Set by `AddToStatic(CALL_COUNTER_BUMP)` at generate +/// time. +pub const CALL_COUNTER_BUMP: i32 = 42; + +/// Canonical `SandboxConfiguration` used to produce the goldens. +/// Layout knobs are deliberately bumped away from defaults so any +/// silent arithmetic change in `SandboxMemoryLayout::new` shifts at +/// least one region between generate-time and load-time. +fn golden_config() -> SandboxConfiguration { + let mut cfg = SandboxConfiguration::default(); + cfg.set_input_data_size(64 * 1024); + cfg.set_output_data_size(64 * 1024); + cfg.set_heap_size(256 * 1024); + cfg.set_scratch_size(512 * 1024); + cfg +} + +fn simpleguest_path() -> String { + simple_guest_as_string().expect("simpleguest_path") +} + +pub fn generate(kind: crate::platform::Kind) -> Arc { + match kind { + crate::platform::Kind::Init => generate_init(), + crate::platform::Kind::Call => generate_call(), + } +} + +pub fn generate_init() -> Arc { + let env = GuestEnvironment::new(GuestBinary::FilePath(simpleguest_path()), Some(INIT_DATA)); + Arc::new(Snapshot::from_env(env, golden_config()).expect("Snapshot::from_env (init)")) +} + +pub fn generate_call() -> Arc { + let mut u = UninitializedSandbox::new( + GuestBinary::FilePath(simpleguest_path()), + Some(golden_config()), + ) + .expect("UninitializedSandbox::new"); + register_host_echo_fns(&mut u); + let mut sbox = u.evolve().expect("evolve"); + run_canonical_calls(&mut sbox); + sbox.snapshot().expect("snapshot") +} + +/// Deterministic sequence of guest calls that mutate captured state +/// before snapshotting. Each call lands a specific bit of state +/// (BSS, heap, host-call wiring) that one of the per-surface +/// checks then asserts on after the golden is loaded. +fn run_canonical_calls(sbox: &mut MultiUseSandbox) { + let bumped: i32 = sbox + .call("AddToStatic", CALL_COUNTER_BUMP) + .expect("AddToStatic"); + assert_eq!(bumped, CALL_COUNTER_BUMP); + + let _: () = sbox + .call("AllocAndWritePattern", HEAP_PATTERN_LEN) + .expect("AllocAndWritePattern"); + + // Drive every host fn once so the captured host_function_details + // blob has known signatures, and any regression in host-dispatch + // surfaces at generate time rather than only during golden load. + let _: i32 = sbox.call("RoundTripHostI32", 1234i32).expect("RTH i32"); + let _: u32 = sbox.call("RoundTripHostU32", 4321u32).expect("RTH u32"); + let _: i64 = sbox.call("RoundTripHostI64", -42i64).expect("RTH i64"); + let _: u64 = sbox.call("RoundTripHostU64", 1u64 << 40).expect("RTH u64"); + let _: f32 = sbox.call("RoundTripHostF32", 3.5f32).expect("RTH f32"); + let _: f64 = sbox.call("RoundTripHostF64", -2.25f64).expect("RTH f64"); + let _: bool = sbox.call("RoundTripHostBool", true).expect("RTH bool"); + let _: String = sbox + .call("RoundTripHostString", "hi".to_string()) + .expect("RTH string"); + let _: Vec = sbox + .call("RoundTripHostVecBytes", vec![1u8, 2, 3]) + .expect("RTH vec"); +} + +/// Register the `HostEcho*` family used by the call golden. Same +/// helper is used both at generate time (against +/// `UninitializedSandbox`) and at load time (against +/// `HostFunctions`) so the registered set matches the captured +/// `host_function_details`. +pub fn register_host_echo_fns(r: &mut R) { + r.register_host_function("HostEchoI32", |v: i32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU32", |v: u32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoI64", |v: i64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU64", |v: u64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF32", |v: f32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF64", |v: f64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoBool", |v: bool| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoString", |v: String| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoVecBytes", |v: Vec| Ok(v)) + .unwrap(); +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/main.rs b/src/hyperlight_host/tests/snapshot_goldens/main.rs new file mode 100644 index 000000000..c0c720bab --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/main.rs @@ -0,0 +1,123 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Snapshot goldens custom-harness test binary. +//! +//! Default mode runs the libtest-mimic harness with one trial per +//! row in `checks::CHECKS`, loading each kind's golden from +//! `target/snapshot-goldens-cache/{version}/{tag}/`. The +//! `generate [out-dir]` subcommand writes the canonical snapshots +//! for the local platform as OCI Image Layouts under `out-dir`, +//! defaulting to the verify cache for a local round-trip. +//! +//! Populate the cache with `just snapshot-goldens-pull` or +//! `just snapshot-goldens-generate`. Set `HYPERLIGHT_GOLDENS_HV` +//! to force the hypervisor name when more than one is available. + +use std::path::{Path, PathBuf}; +use std::process::ExitCode; +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; +use libtest_mimic::{Arguments, Failed, Trial}; + +mod checks; +mod fixtures; +mod oci; +mod platform; + +use checks::Check; +use platform::{Kind, Platform}; + +fn main() -> ExitCode { + let mut argv = std::env::args().skip(1); + if argv.next().as_deref() == Some("generate") { + let out = argv + .next() + .map(PathBuf::from) + .unwrap_or_else(oci::cache_root); + return run_generate(&out); + } + run_verify() +} + +fn run_verify() -> ExitCode { + let args = Arguments::from_args(); + let Some(platform) = Platform::detect() else { + eprintln!( + "snapshot goldens: skipping verify: no (hypervisor, cpu, profile) platform detected on this host", + ); + return ExitCode::SUCCESS; + }; + println!( + "snapshot goldens: verifying platform={} version={}", + platform.suffix(), + platform::GOLDENS_VERSION, + ); + let trials = checks::CHECKS.iter().map(|c| trial(&platform, c)).collect(); + libtest_mimic::run(&args, trials).exit_code() +} + +fn trial(platform: &Platform, check: &'static Check) -> Trial { + let tag = platform.tag(check.kind); + Trial::test(check.name, move || { + let dir = oci::golden_dir(&tag).map_err(Failed::from)?; + let mut sbox = load_sandbox(&dir, &tag, check.kind).map_err(Failed::from)?; + (check.run)(&mut sbox).map_err(Failed::from) + }) +} + +fn load_sandbox(golden_dir: &Path, tag: &str, kind: Kind) -> Result { + let snap = Snapshot::from_oci(golden_dir, tag) + .map_err(|e| format!("Snapshot::from_oci({tag}): {e}"))?; + let mut funcs = HostFunctions::default(); + if matches!(kind, Kind::Call) { + fixtures::register_host_echo_fns(&mut funcs); + } + MultiUseSandbox::from_snapshot(Arc::new(snap), funcs, None) + .map_err(|e| format!("MultiUseSandbox::from_snapshot({tag}): {e}")) +} + +fn run_generate(out_dir: &Path) -> ExitCode { + let Some(platform) = Platform::detect() else { + eprintln!( + "snapshot goldens: generate: no (hypervisor, cpu, profile) platform detected on this host", + ); + return ExitCode::FAILURE; + }; + if let Err(e) = std::fs::create_dir_all(out_dir) { + eprintln!("snapshot goldens: generate: create {out_dir:?}: {e}"); + return ExitCode::FAILURE; + } + println!( + "snapshot goldens: generating platform={} version={} into {}", + platform.suffix(), + platform::GOLDENS_VERSION, + out_dir.display(), + ); + for kind in [Kind::Init, Kind::Call] { + let tag = platform.tag(kind); + let dir = out_dir.join(&tag); + let snap = fixtures::generate(kind); + if let Err(e) = snap.to_oci(&dir, &tag) { + eprintln!("snapshot goldens: generate: to_oci({tag}): {e}"); + return ExitCode::FAILURE; + } + println!(" wrote {tag} -> {}", dir.display()); + } + ExitCode::SUCCESS +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/oci.rs b/src/hyperlight_host/tests/snapshot_goldens/oci.rs new file mode 100644 index 000000000..de378b8c1 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/oci.rs @@ -0,0 +1,54 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! On-disk cache lookup for golden OCI Image Layouts. + +use std::path::PathBuf; + +use crate::platform::GOLDENS_VERSION; + +pub fn cache_root() -> PathBuf { + // Workspace target dir is two levels up from this crate. + let target = std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| { + let raw = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("target"); + std::fs::canonicalize(&raw).unwrap_or(raw) + }); + target.join("snapshot-goldens-cache").join(GOLDENS_VERSION) +} + +fn cache_dir_for(tag: &str) -> PathBuf { + cache_root().join(tag) +} + +/// Locate the golden OCI Image Layout for `tag` in the local +/// cache. A missing layout is an error with guidance to populate +/// the cache. +pub fn golden_dir(tag: &str) -> Result { + let dir = cache_dir_for(tag); + if dir.join("oci-layout").is_file() { + return Ok(dir); + } + Err(format!( + "no golden OCI layout found at {dir:?} for tag `{tag}`. \ + Run `just snapshot-goldens-pull` to fetch the published goldens, \ + or `just snapshot-goldens-generate` to regenerate them locally.", + )) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/platform.rs b/src/hyperlight_host/tests/snapshot_goldens/platform.rs new file mode 100644 index 000000000..c0aa10cf1 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/platform.rs @@ -0,0 +1,190 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Local platform detection and tag naming for snapshot goldens. +//! +//! A snapshot is not portable across `(hypervisor, cpu vendor, +//! build profile)`. Each such triple gets its own set of tags, +//! named `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}-{kind}`. + +/// Goldens version. Follows a `vMAJOR.MINOR` scheme. Bump MAJOR when +/// the snapshot ABI changes (anything that invalidates older +/// snapshots: ABI bump, media type bump, layout arithmetic changes, +/// captured-register changes). Bump MINOR when the set of `CHECKS` +/// changes but the ABI does not. See `docs/snapshot-versioning.md`. +/// +/// The runtime tripwire test +/// `hyperlight_host::sandbox::snapshot::tripwires::media_types_match_expected_for_goldens` +/// and the compile-time `SNAPSHOT_ABI_VERSION` assertion pin the +/// known ABI surface against this version's goldens. +pub const GOLDENS_VERSION: &str = "v1.0"; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Kind { + Init, + Call, +} + +impl Kind { + pub fn as_str(self) -> &'static str { + match self { + Self::Init => "init", + Self::Call => "call", + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Hypervisor { + Kvm, + Mshv, + Whp, +} + +impl Hypervisor { + fn as_str(self) -> &'static str { + match self { + Self::Kvm => "kvm", + Self::Mshv => "mshv", + Self::Whp => "whp", + } + } + + /// Detect the locally available hypervisor. Order matches the + /// host crate's preference: `/dev/mshv` over `/dev/kvm` on + /// Linux, WHP on Windows. `HYPERLIGHT_GOLDENS_HV` overrides on + /// hosts that have more than one available. + fn detect() -> Option { + if let Some(v) = std::env::var_os("HYPERLIGHT_GOLDENS_HV") { + return match v.to_string_lossy().as_ref() { + "kvm" => Some(Self::Kvm), + "mshv" => Some(Self::Mshv), + "whp" => Some(Self::Whp), + _ => None, + }; + } + #[cfg(target_os = "linux")] + { + if std::path::Path::new("/dev/mshv").exists() { + return Some(Self::Mshv); + } + if std::path::Path::new("/dev/kvm").exists() { + return Some(Self::Kvm); + } + None + } + #[cfg(target_os = "windows")] + { + Some(Self::Whp) + } + #[cfg(not(any(target_os = "linux", target_os = "windows")))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum CpuVendor { + Intel, + Amd, +} + +impl CpuVendor { + fn as_str(self) -> &'static str { + match self { + Self::Intel => "intel", + Self::Amd => "amd", + } + } + + /// Detect the local CPU vendor via the `0` leaf of `cpuid`. + /// Returns `None` on non-`x86_64` targets or unknown vendor + /// strings. + fn detect() -> Option { + #[cfg(target_arch = "x86_64")] + { + // SAFETY: cpuid leaf 0 is always available on x86_64. + let r = unsafe { core::arch::x86_64::__cpuid(0) }; + let mut bytes = [0u8; 12]; + bytes[0..4].copy_from_slice(&r.ebx.to_le_bytes()); + bytes[4..8].copy_from_slice(&r.edx.to_le_bytes()); + bytes[8..12].copy_from_slice(&r.ecx.to_le_bytes()); + match &bytes { + b"GenuineIntel" => Some(Self::Intel), + b"AuthenticAMD" => Some(Self::Amd), + _ => None, + } + } + #[cfg(not(target_arch = "x86_64"))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Profile { + Debug, + Release, +} + +impl Profile { + fn as_str(self) -> &'static str { + match self { + Self::Debug => "debug", + Self::Release => "release", + } + } + + fn detect() -> Self { + if cfg!(debug_assertions) { + Self::Debug + } else { + Self::Release + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Platform { + hv: Hypervisor, + cpu: CpuVendor, + profile: Profile, +} + +impl Platform { + pub fn detect() -> Option { + Some(Self { + hv: Hypervisor::detect()?, + cpu: CpuVendor::detect()?, + profile: Profile::detect(), + }) + } + + pub fn suffix(&self) -> String { + format!( + "{}-{}-{}", + self.hv.as_str(), + self.cpu.as_str(), + self.profile.as_str(), + ) + } + + pub fn tag(&self, kind: Kind) -> String { + format!("{}-{}-{}", GOLDENS_VERSION, self.suffix(), kind.as_str()) + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 8cc88faa1..bb7a110a9 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -387,6 +387,149 @@ fn get_size_prefixed_buffer(data: Vec) -> Vec { data } +// --- Type-coverage echoes (used by snapshot golden tests F6) --- +// +// One guest function per primitive parameter/return type, so the +// guest-call wire format for each variant of `ParameterValue` / +// `ReturnValue` is exercised by a single round-trip call. +// +// Strings (`Echo`), `Vec` (`GetSizePrefixedBuffer`), `f32` +// (`EchoFloat`) and `f64` (`EchoDouble`) are already covered above. +// The `unit` return type is exercised by `NoOp` below. + +#[guest_function("EchoI32")] +fn echo_i32(v: i32) -> i32 { + v +} + +#[guest_function("EchoU32")] +fn echo_u32(v: u32) -> u32 { + v +} + +#[guest_function("EchoI64")] +fn echo_i64(v: i64) -> i64 { + v +} + +#[guest_function("EchoU64")] +fn echo_u64(v: u64) -> u64 { + v +} + +#[guest_function("EchoBool")] +fn echo_bool(v: bool) -> bool { + v +} + +#[guest_function("NoOp")] +fn no_op() {} + +// --- Host-fn round trips (used by snapshot golden tests F7) --- +// +// One host function per primitive type. The host registers each; +// the guest invokes each via a `RoundTripHostT` wrapper so the test +// can verify the value round-trips through the host. This exercises +// the persisted `HostFunctionDetails` flatbuffer schema for every +// primitive type (both as parameter and as return). + +#[host_function("HostEchoI32")] +fn host_echo_i32(v: i32) -> Result; + +#[host_function("HostEchoU32")] +fn host_echo_u32(v: u32) -> Result; + +#[host_function("HostEchoI64")] +fn host_echo_i64(v: i64) -> Result; + +#[host_function("HostEchoU64")] +fn host_echo_u64(v: u64) -> Result; + +#[host_function("HostEchoF32")] +fn host_echo_f32(v: f32) -> Result; + +#[host_function("HostEchoF64")] +fn host_echo_f64(v: f64) -> Result; + +#[host_function("HostEchoBool")] +fn host_echo_bool(v: bool) -> Result; + +#[host_function("HostEchoString")] +fn host_echo_string(v: String) -> Result; + +#[host_function("HostEchoVecBytes")] +fn host_echo_vec_bytes(v: Vec) -> Result>; + +#[guest_function("RoundTripHostI32")] +fn round_trip_host_i32(v: i32) -> Result { + host_echo_i32(v) +} + +#[guest_function("RoundTripHostU32")] +fn round_trip_host_u32(v: u32) -> Result { + host_echo_u32(v) +} + +#[guest_function("RoundTripHostI64")] +fn round_trip_host_i64(v: i64) -> Result { + host_echo_i64(v) +} + +#[guest_function("RoundTripHostU64")] +fn round_trip_host_u64(v: u64) -> Result { + host_echo_u64(v) +} + +#[guest_function("RoundTripHostF32")] +fn round_trip_host_f32(v: f32) -> Result { + host_echo_f32(v) +} + +#[guest_function("RoundTripHostF64")] +fn round_trip_host_f64(v: f64) -> Result { + host_echo_f64(v) +} + +#[guest_function("RoundTripHostBool")] +fn round_trip_host_bool(v: bool) -> Result { + host_echo_bool(v) +} + +#[guest_function("RoundTripHostString")] +fn round_trip_host_string(v: String) -> Result { + host_echo_string(v) +} + +#[guest_function("RoundTripHostVecBytes")] +fn round_trip_host_vec_bytes(v: Vec) -> Result> { + host_echo_vec_bytes(v) +} + +// --- Heap pattern (used by snapshot golden test F5) --- +// +// `AllocAndWritePattern(len)` allocates a `Vec` of length `len`, +// writes a deterministic byte pattern into it, and pins it in a +// static so the heap allocation survives the snapshot. +// `ReadPattern()` returns whatever is currently pinned. The test +// snapshots between the two calls and asserts the bytes round-trip +// across the on-disk save/load. + +static mut HEAP_PATTERN: Option> = None; + +#[guest_function("AllocAndWritePattern")] +fn alloc_and_write_pattern(len: u64) { + let v: Vec = (0..len as usize).map(|i| (i & 0xff) as u8).collect(); + unsafe { HEAP_PATTERN = Some(v) }; +} + +#[guest_function("ReadPattern")] +fn read_pattern() -> Vec { + #[allow(static_mut_refs)] + unsafe { + HEAP_PATTERN.clone().unwrap_or_default() + } +} + #[expect( clippy::empty_loop, reason = "This function is used to keep the CPU busy"