From b1ccf447ed12c99245cdbca206b3154baef5eb44 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 16:01:13 -0700 Subject: [PATCH 1/3] Add OCI snapshot persistence Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- CHANGELOG.md | 3 + Cargo.lock | 186 +++++ docs/snapshot-oci-format.md | 96 +++ src/hyperlight_host/Cargo.toml | 5 +- src/hyperlight_host/src/mem/shared_mem.rs | 1 - .../src/sandbox/initialized_multi_use.rs | 14 + .../src/sandbox/snapshot/file/config.rs | 549 ++++++++++++++ .../src/sandbox/snapshot/file/digest.rs | 132 ++++ .../src/sandbox/snapshot/file/fsutil.rs | 101 +++ .../src/sandbox/snapshot/file/media_types.rs | 34 + .../src/sandbox/snapshot/file/mod.rs | 693 ++++++++++++++++++ .../src/sandbox/snapshot/mod.rs | 3 + 12 files changed, 1815 insertions(+), 2 deletions(-) create mode 100644 docs/snapshot-oci-format.md create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file/config.rs create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file/digest.rs create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file/mod.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 53fef075d..42f7e3c9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Prerelease] - Unreleased +### Added +* `Snapshot::to_oci`, `Snapshot::from_oci`, and `Snapshot::from_oci_unchecked` for persisting and loading sandbox snapshots as OCI Image Layout directories by @ludfjig in https://github.com/hyperlight-dev/hyperlight/pull/1465 + ## [v0.15.0] - 2026-05-06 ### Added diff --git a/Cargo.lock b/Cargo.lock index d86f0fbcb..6b86b7aa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -530,6 +530,27 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -730,6 +751,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "derive_arbitrary" version = "1.4.2" @@ -741,6 +797,37 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -1154,6 +1241,18 @@ dependencies = [ "wasip3", ] +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "gimli" version = "0.33.0" @@ -1361,6 +1460,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.4.0" @@ -1585,6 +1690,7 @@ dependencies = [ "gdbstub", "gdbstub_arch", "goblin", + "hex", "hyperlight-common", "hyperlight-component-macro", "hyperlight-guest-tracing", @@ -1600,6 +1706,7 @@ dependencies = [ "metrics-util", "mshv-bindings", "mshv-ioctls", + "oci-spec", "opentelemetry", "opentelemetry-otlp", "opentelemetry-semantic-conventions", @@ -1612,6 +1719,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "sha2", "signal-hook-registry", "tempfile", "termcolor", @@ -1789,6 +1897,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -1912,6 +2026,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "kurbo" version = "0.11.3" @@ -2332,6 +2461,23 @@ dependencies = [ "ruzstd", ] +[[package]] +name = "oci-spec" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3da52b83ce3258fbf29f66ac784b279453c2ac3c22c5805371b921ede0d308" +dependencies = [ + "const_format", + "derive_builder", + "getset", + "regex", + "serde", + "serde_json", + "strum", + "strum_macros", + "thiserror", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -2780,6 +2926,28 @@ dependencies = [ "toml_edit", ] +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -3432,6 +3600,24 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "syn" version = "2.0.117" diff --git a/docs/snapshot-oci-format.md b/docs/snapshot-oci-format.md new file mode 100644 index 000000000..f7a701bc9 --- /dev/null +++ b/docs/snapshot-oci-format.md @@ -0,0 +1,96 @@ +# Hyperlight snapshot on-disk format + +Hyperlight serialises a `Snapshot` to disk as an [OCI Image Layout] +directory. `Snapshot::to_oci` writes one. `Snapshot::from_oci` and +`Snapshot::from_oci_unchecked` read one back. + +[OCI Image Layout]: https://github.com/opencontainers/image-spec/blob/main/image-layout.md + +## Directory layout + +```text +path/ + oci-layout {"imageLayoutVersion":"1.0.0"} + index.json one manifest descriptor per tag, + tagged via the OCI standard + `org.opencontainers.image.ref.name` + annotation + blobs/sha256/ + OCI image manifest JSON + Hyperlight config JSON + raw memory bytes + (`memory_size` bytes) +``` + +Three blob kinds per tag: + +* **manifest** (`application/vnd.oci.image.manifest.v1+json`). Tiny JSON + pointer record selected via `index.json`. References one config and + one layer by digest. +* **config** (`application/vnd.hyperlight.snapshot.config.v1+json`). The + snapshot descriptor: arch, ABI version, entrypoint sregs, memory + layout, registered host functions, snapshot generation counter. + Loaded eagerly and fully parsed. +* **layer / memory** (`application/vnd.hyperlight.snapshot.memory.v1`). + The raw guest memory image, exactly `memory_size` bytes. mmap'd on + restore. + +Blob filenames are the sha256 of the blob bytes, so identical blobs +across tags are stored once. + +## What is one snapshot + +A single saved `Snapshot` consists of exactly: + +* one entry in `index.json`, carrying the `tag` as + `org.opencontainers.image.ref.name`, +* one **manifest** blob (referenced by that index entry), +* one **config** blob (referenced by the manifest's `config` field), +* one **layer** blob (the only entry in the manifest's `layers` + array, holding the raw memory image). + +Saving two snapshots under different tags into the same `path` +produces two index entries and two manifests. Configs and layers are +deduplicated by content, so identical bytes are stored once and +referenced by both manifests. + +Saving the same tag a second time replaces that tag's index entry +and writes a fresh manifest. The previous manifest, and any of its +config or layer blobs that no other tag references, become orphans +in `blobs/sha256/`. + +## Write semantics + +`Snapshot::to_oci(path, tag)` opens or creates the OCI layout at +`path` and writes one snapshot under `tag`. The parent directory of +`path` must already exist. `path` itself is created if absent. An +existing layout at `path` is preserved: other tags are kept, and a +tag equal to `tag` is replaced. + +`index.json` is rewritten via a tmp file plus `rename`, the commit +point for the whole operation. A crash before that rename leaves the +prior layout intact. A crash after it leaves the new layout intact. + +Replaced tags leave orphan blobs behind. To compact, remove the +directory and re-save. Concurrent writers to the same `path` are +unsupported. + +This mirrors the merge behaviour of `containers/image` (skopeo, +podman), `go-containerregistry` (crane), and `regclient`. + +## Read semantics + +`Snapshot::from_oci(path, tag)` verifies sha256 for manifest, config, +and snapshot blobs. `Snapshot::from_oci_unchecked` skips the digest +verification, trading integrity for performance, and keeps every +other check (OCI structure, descriptor sizes, schema versions, arch / +hypervisor / ABI tags, layout bounds, entrypoint bounds). + +A missing tag or duplicate tag in `index.json` is rejected. + +## Portability + +Snapshot images are bound to a specific CPU architecture and +hypervisor. Both are recorded in the config blob and checked at load +time, with mismatches rejected with a clear error. The hypervisor +tag (`kvm`, `mshv`, `whp`) constrains the host OS. diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 7e709b449..1eb3571f5 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -48,9 +48,13 @@ thiserror = "2.0.18" chrono = { version = "0.4", optional = true } anyhow = "1.0" metrics = "0.24.6" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" elfcore = { version = "2.0", optional = true } uuid = { version = "1.23.2", features = ["v4"] } +oci-spec = { version = "0.8", default-features = false, features = ["image"] } +sha2 = "0.10" +hex = "0.4" [target.'cfg(windows)'.dependencies] windows = { version = "0.62", features = [ @@ -82,7 +86,6 @@ mshv-ioctls = { version = "0.6", optional = true} [dev-dependencies] uuid = { version = "1.23.2", features = ["v4"] } signal-hook-registry = "1.4.8" -serde = "1.0" iced-x86 = { version = "1.21", default-features = false, features = ["std", "code_asm"] } proptest = "1.11.0" tempfile = "3.27.0" diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index d9c69de2e..e5c224b22 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -1568,7 +1568,6 @@ impl ReadonlySharedMemory { /// The file's length must be a non-zero multiple of `PAGE_SIZE`. /// `guest_mapped_size` must be a non-zero multiple of `PAGE_SIZE` /// no greater than the file's length. - #[cfg_attr(not(test), expect(dead_code))] pub(crate) fn from_file(file: &std::fs::File, guest_mapped_size: usize) -> Result { let len: usize = file .metadata() diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 62a4600d0..4cab2acbb 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -185,6 +185,20 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// From a snapshot loaded from disk: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox}; + /// # use hyperlight_host::sandbox::snapshot::Snapshot; + /// # fn example() -> Result<(), Box> { + /// let snapshot = Arc::new(Snapshot::from_oci("./guest_snapshot", "latest")?); + /// let mut sandbox = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: String = sandbox.call("Echo", "hello".to_string())?; + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub fn from_snapshot( snapshot: Arc, diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/config.rs b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs new file mode 100644 index 000000000..04a7c1b45 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs @@ -0,0 +1,549 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use hyperlight_common::flatbuffer_wrappers::function_types::{ParameterType, ReturnType}; +use hyperlight_common::flatbuffer_wrappers::host_function_definition::HostFunctionDefinition; +use hyperlight_common::vmem::PAGE_SIZE; +use serde::{Deserialize, Serialize}; + +use super::media_types::SNAPSHOT_ABI_VERSION; +use crate::hypervisor::regs::{CommonSegmentRegister, CommonSpecialRegisters, CommonTableRegister}; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::memory_region::MemoryRegionFlags; + +// --- Arch and hypervisor identifiers -------------------------------- + +/// Guest architecture the snapshot was captured for. Checked on load +/// against the running host. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub(super) enum Arch { + X86_64, + Aarch64, + I686, +} + +impl Arch { + pub(super) fn current() -> Self { + #[cfg(feature = "i686-guest")] + { + Self::I686 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))] + { + Self::X86_64 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "aarch64"))] + { + Self::Aarch64 + } + } +} + +/// Hypervisor backend the snapshot was captured under. Checked on +/// load because vCPU register state is backend-specific. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub(super) enum Hypervisor { + Kvm, + Mshv, + Whp, +} + +impl Hypervisor { + pub(super) fn current() -> Option { + #[allow(unused_imports)] + use crate::hypervisor::virtual_machine::HypervisorType; + use crate::hypervisor::virtual_machine::get_available_hypervisor; + + match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Some(Self::Kvm), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Some(Self::Mshv), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Some(Self::Whp), + None => None, + } + } + + fn name(&self) -> &'static str { + match self { + Self::Kvm => "KVM", + Self::Mshv => "MSHV", + Self::Whp => "WHP", + } + } +} + +// --- Config JSON shape ---------------------------------------------- + +/// Top-level Hyperlight snapshot config JSON. Lives at +/// `blobs/sha256/` with media type +/// `application/vnd.hyperlight.snapshot.config.v1+json`. +/// +/// In OCI terms this is the "image config" blob that the manifest's +/// `config` descriptor points to. It describes the accompanying +/// memory layer (the snapshot bytes) and everything the loader needs +/// to reconstruct a runnable `Snapshot`. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct OciSnapshotConfig { + /// Hyperlight crate version that produced this config. Recorded + /// for diagnostics. Not checked on load. + pub(super) hyperlight_version: String, + pub(super) arch: Arch, + /// Memory blob ABI version. See [`SNAPSHOT_ABI_VERSION`]. + pub(super) abi_version: u32, + pub(super) hypervisor: Hypervisor, + /// Top of the guest stack, in guest virtual address space. + pub(super) stack_top_gva: u64, + pub(super) entrypoint: Entrypoint, + pub(super) layout: MemoryLayout, + /// Total size of the memory blob in bytes (including the guest + /// page-table tail, if any). Equal to `self.memory.mem_size()`. + pub(super) memory_size: u64, + /// Names and signatures of host functions registered when this + /// snapshot was taken. Validated against the loader's registry. + pub(super) host_functions: Vec, + /// Generation counter for the snapshot. Restored verbatim into + /// the `Snapshot` so guest-visible bookkeeping at + /// `SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET` is continuous across + /// save/load. + pub(super) snapshot_generation: u64, +} + +/// What the loader should do with the restored sandbox: jump to the +/// guest entrypoint, or resume a paused call with captured sregs. +/// The enum shape enforces that `Call` carries sregs and `Initialise` +/// does not. +#[derive(Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "lowercase", deny_unknown_fields)] +pub(super) enum Entrypoint { + Initialise { addr: u64 }, + Call { addr: u64, sregs: Box }, +} + +/// Sizes and permissions of the regions inside the snapshot blob, +/// enough for the loader to rebuild a `SandboxMemoryLayout`. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct MemoryLayout { + pub(super) input_data_size: usize, + pub(super) output_data_size: usize, + pub(super) heap_size: usize, + pub(super) code_size: usize, + pub(super) init_data_size: usize, + /// Memory region flag bits. `None` means default permissions. + pub(super) init_data_permissions: Option, + pub(super) scratch_size: usize, + pub(super) snapshot_size: usize, + pub(super) pt_size: Option, +} + +/// Name and signature of one host function registered when the +/// snapshot was taken. The loader validates these against the +/// registry of the sandbox it is restoring into. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct HostFunction { + function_name: String, + parameter_types: Vec, + return_type: ReturnTypeRepr, +} + +/// JSON-friendly mirror of +/// [`hyperlight_common::flatbuffer_wrappers::function_types::ParameterType`]. +/// Kept local so we don't have to plumb serde through `hyperlight_common`. +/// The `match`es below are exhaustive: any new variant upstream forces +/// an explicit decision here. +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "snake_case")] +enum ParameterTypeRepr { + Int, + UInt, + Long, + ULong, + Float, + Double, + String, + Bool, + VecBytes, +} + +/// JSON-friendly mirror of +/// [`hyperlight_common::flatbuffer_wrappers::function_types::ReturnType`]. +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "snake_case")] +enum ReturnTypeRepr { + Int, + UInt, + Long, + ULong, + Float, + Double, + String, + Bool, + Void, + VecBytes, +} + +impl From<&ParameterType> for ParameterTypeRepr { + fn from(p: &ParameterType) -> Self { + match p { + ParameterType::Int => Self::Int, + ParameterType::UInt => Self::UInt, + ParameterType::Long => Self::Long, + ParameterType::ULong => Self::ULong, + ParameterType::Float => Self::Float, + ParameterType::Double => Self::Double, + ParameterType::String => Self::String, + ParameterType::Bool => Self::Bool, + ParameterType::VecBytes => Self::VecBytes, + } + } +} + +impl From for ParameterType { + fn from(r: ParameterTypeRepr) -> Self { + match r { + ParameterTypeRepr::Int => Self::Int, + ParameterTypeRepr::UInt => Self::UInt, + ParameterTypeRepr::Long => Self::Long, + ParameterTypeRepr::ULong => Self::ULong, + ParameterTypeRepr::Float => Self::Float, + ParameterTypeRepr::Double => Self::Double, + ParameterTypeRepr::String => Self::String, + ParameterTypeRepr::Bool => Self::Bool, + ParameterTypeRepr::VecBytes => Self::VecBytes, + } + } +} + +impl From<&ReturnType> for ReturnTypeRepr { + fn from(r: &ReturnType) -> Self { + match r { + ReturnType::Int => Self::Int, + ReturnType::UInt => Self::UInt, + ReturnType::Long => Self::Long, + ReturnType::ULong => Self::ULong, + ReturnType::Float => Self::Float, + ReturnType::Double => Self::Double, + ReturnType::String => Self::String, + ReturnType::Bool => Self::Bool, + ReturnType::Void => Self::Void, + ReturnType::VecBytes => Self::VecBytes, + } + } +} + +impl From for ReturnType { + fn from(r: ReturnTypeRepr) -> Self { + match r { + ReturnTypeRepr::Int => Self::Int, + ReturnTypeRepr::UInt => Self::UInt, + ReturnTypeRepr::Long => Self::Long, + ReturnTypeRepr::ULong => Self::ULong, + ReturnTypeRepr::Float => Self::Float, + ReturnTypeRepr::Double => Self::Double, + ReturnTypeRepr::String => Self::String, + ReturnTypeRepr::Bool => Self::Bool, + ReturnTypeRepr::Void => Self::Void, + ReturnTypeRepr::VecBytes => Self::VecBytes, + } + } +} + +/// Captured x86_64 special registers for a paused vCPU. Round-trips +/// to/from [`CommonSpecialRegisters`] and is restored verbatim when +/// resuming a `Call` entrypoint. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct Sregs { + cs: SegmentRegister, + ds: SegmentRegister, + es: SegmentRegister, + fs: SegmentRegister, + gs: SegmentRegister, + ss: SegmentRegister, + tr: SegmentRegister, + ldt: SegmentRegister, + gdt: TableRegister, + idt: TableRegister, + cr0: u64, + cr2: u64, + cr3: u64, + cr4: u64, + cr8: u64, + efer: u64, + apic_base: u64, + interrupt_bitmap: [u64; 4], +} + +/// Serde mirror of [`CommonSegmentRegister`]. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct SegmentRegister { + base: u64, + limit: u32, + selector: u16, + type_: u8, + present: u8, + dpl: u8, + db: u8, + s: u8, + l: u8, + g: u8, + avl: u8, + unusable: u8, + padding: u8, +} + +/// Serde mirror of [`CommonTableRegister`]. +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct TableRegister { + base: u64, + limit: u16, +} + +// --- Conversions between repr and runtime types --------------------- + +impl From<&CommonSpecialRegisters> for Sregs { + fn from(s: &CommonSpecialRegisters) -> Self { + let seg = |r: &CommonSegmentRegister| SegmentRegister { + base: r.base, + limit: r.limit, + selector: r.selector, + type_: r.type_, + present: r.present, + dpl: r.dpl, + db: r.db, + s: r.s, + l: r.l, + g: r.g, + avl: r.avl, + unusable: r.unusable, + padding: r.padding, + }; + let tab = |r: &CommonTableRegister| TableRegister { + base: r.base, + limit: r.limit, + }; + Self { + cs: seg(&s.cs), + ds: seg(&s.ds), + es: seg(&s.es), + fs: seg(&s.fs), + gs: seg(&s.gs), + ss: seg(&s.ss), + tr: seg(&s.tr), + ldt: seg(&s.ldt), + gdt: tab(&s.gdt), + idt: tab(&s.idt), + cr0: s.cr0, + cr2: s.cr2, + cr3: s.cr3, + cr4: s.cr4, + cr8: s.cr8, + efer: s.efer, + apic_base: s.apic_base, + interrupt_bitmap: s.interrupt_bitmap, + } + } +} + +impl From for CommonSpecialRegisters { + fn from(r: Sregs) -> Self { + let seg = |s: SegmentRegister| CommonSegmentRegister { + base: s.base, + limit: s.limit, + selector: s.selector, + type_: s.type_, + present: s.present, + dpl: s.dpl, + db: s.db, + s: s.s, + l: s.l, + g: s.g, + avl: s.avl, + unusable: s.unusable, + padding: s.padding, + }; + let tab = |t: TableRegister| CommonTableRegister { + base: t.base, + limit: t.limit, + }; + Self { + cs: seg(r.cs), + ds: seg(r.ds), + es: seg(r.es), + fs: seg(r.fs), + gs: seg(r.gs), + ss: seg(r.ss), + tr: seg(r.tr), + ldt: seg(r.ldt), + gdt: tab(r.gdt), + idt: tab(r.idt), + cr0: r.cr0, + cr2: r.cr2, + cr3: r.cr3, + cr4: r.cr4, + cr8: r.cr8, + efer: r.efer, + apic_base: r.apic_base, + interrupt_bitmap: r.interrupt_bitmap, + } + } +} + +impl From<&HostFunctionDefinition> for HostFunction { + fn from(d: &HostFunctionDefinition) -> Self { + let parameter_types = d + .parameter_types + .as_ref() + .map(|v| v.iter().map(ParameterTypeRepr::from).collect()) + .unwrap_or_default(); + Self { + function_name: d.function_name.clone(), + parameter_types, + return_type: ReturnTypeRepr::from(&d.return_type), + } + } +} + +impl From for HostFunctionDefinition { + fn from(r: HostFunction) -> Self { + Self { + function_name: r.function_name, + parameter_types: Some(r.parameter_types.into_iter().map(Into::into).collect()), + return_type: r.return_type.into(), + } + } +} + +impl OciSnapshotConfig { + pub(super) fn validate_for_load(&self) -> crate::Result<()> { + if self.arch != Arch::current() { + return Err(crate::new_error!( + "snapshot architecture mismatch: file is {:?}, current host is {:?}", + self.arch, + Arch::current() + )); + } + if self.abi_version != SNAPSHOT_ABI_VERSION { + return Err(crate::new_error!( + "snapshot ABI version mismatch: file has version {}, this build expects {}. \ + The snapshot must be regenerated from the guest binary.", + self.abi_version, + SNAPSHOT_ABI_VERSION + )); + } + let current_hv = Hypervisor::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to load snapshot"))?; + if self.hypervisor != current_hv { + return Err(crate::new_error!( + "snapshot hypervisor mismatch: file was created on {} but the current hypervisor is {}", + self.hypervisor.name(), + current_hv.name() + )); + } + // Bound memory size early so the subsequent file-size check + // does not have to deal with absurd values. + if self.memory_size == 0 || self.memory_size > SandboxMemoryLayout::MAX_MEMORY_SIZE as u64 { + return Err(crate::new_error!( + "snapshot memory_size ({}) is out of range", + self.memory_size + )); + } + if self.memory_size as usize % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot memory_size ({}) is not a multiple of PAGE_SIZE", + self.memory_size + )); + } + // Invariant: `snapshot_size + pt_size == memory_size`. + // `snapshot_size` is the guest-visible prefix of the blob, + // mapped into guest PA space at `BASE_ADDRESS`. `pt_size` + // is the page-table tail that sits after it in the blob and + // the host mapping, outside the guest mapping of the + // snapshot region. + if self.layout.snapshot_size == 0 { + return Err(crate::new_error!("snapshot snapshot_size must be nonzero")); + } + if self.layout.snapshot_size % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot snapshot_size ({}) is not a multiple of PAGE_SIZE", + self.layout.snapshot_size + )); + } + let pt = self.layout.pt_size.unwrap_or(0); + if pt % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot pt_size ({}) is not a multiple of PAGE_SIZE", + pt + )); + } + if (self.layout.snapshot_size as u64).saturating_add(pt as u64) != self.memory_size { + return Err(crate::new_error!( + "snapshot snapshot_size ({}) + pt_size ({}) does not equal memory_size ({})", + self.layout.snapshot_size, + pt, + self.memory_size + )); + } + if let Some(bits) = self.layout.init_data_permissions { + MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!( + "snapshot init_data_permissions {:#x} contains unknown flag bits", + bits + ) + })?; + } + + // Entrypoint address must point inside the guest snapshot + // region. Hyperlight identity-maps the snapshot region in low + // GPAs, so the same bounds apply to virtual and physical + // addresses there. A crafted config could otherwise direct + // execution into unmapped GPA space (which only catches the + // bug at vCPU run time) or, worse, into the scratch region + // (which is writable). The bound here is + // `[BASE_ADDRESS, BASE_ADDRESS + snapshot_size)` because the + // snapshot blob covers exactly the snapshot region. + let snap_lo = SandboxMemoryLayout::BASE_ADDRESS as u64; + let snap_hi = snap_lo + .checked_add(self.layout.snapshot_size as u64) + .ok_or_else(|| { + crate::new_error!( + "snapshot layout overflow: BASE_ADDRESS + snapshot_size ({}) does not fit in u64", + self.layout.snapshot_size + ) + })?; + let entry_addr = match &self.entrypoint { + Entrypoint::Initialise { addr } => *addr, + Entrypoint::Call { addr, .. } => *addr, + }; + if entry_addr < snap_lo || entry_addr >= snap_hi { + return Err(crate::new_error!( + "snapshot entrypoint addr {:#x} is outside the snapshot region [{:#x}, {:#x})", + entry_addr, + snap_lo, + snap_hi + )); + } + Ok(()) + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/digest.rs b/src/hyperlight_host/src/sandbox/snapshot/file/digest.rs new file mode 100644 index 000000000..db70495a7 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/digest.rs @@ -0,0 +1,132 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::io::{Read, Seek, SeekFrom}; + +use oci_spec::image::Digest; +use sha2::{Digest as _, Sha256}; + +/// A `sha256:` digest as recorded in OCI manifests. The bare hex +/// (without prefix) is also the blob's filename inside `blobs/sha256/`. +#[derive(Clone)] +pub(super) struct Digest256 { + /// Lowercase hex of the 32-byte sha256 output. + pub(super) hex: String, +} + +impl Digest256 { + pub(super) fn from_bytes(bytes: &[u8]) -> Self { + let arr: [u8; 32] = Sha256::digest(bytes).into(); + Self { + hex: hex::encode(arr), + } + } + + fn from_hasher(h: Sha256) -> Self { + let arr: [u8; 32] = h.finalize().into(); + Self { + hex: hex::encode(arr), + } + } +} + +/// Build an `oci_spec::image::Digest` from a [`Digest256`]. +pub(super) fn oci_digest(d: &Digest256) -> crate::Result { + Digest::try_from(format!("sha256:{}", d.hex)) + .map_err(|e| crate::new_error!("failed to construct OCI digest: {}", e)) +} + +pub(super) fn parse_oci_digest(s: &str) -> crate::Result { + let rest = s.strip_prefix("sha256:").ok_or_else(|| { + crate::new_error!( + "OCI descriptor digest {:?} is not a sha256 digest (only sha256 is supported)", + s + ) + })?; + // OCI image-spec pins sha256 encoding to `[a-f0-9]{64}`. Reject + // uppercase hex up front so we stay byte-compatible with + // containerd, oras, crane, and the Docker registry. + if rest.len() != 64 + || !rest + .bytes() + .all(|b| b.is_ascii_digit() || (b'a'..=b'f').contains(&b)) + { + return Err(crate::new_error!( + "OCI descriptor digest {:?} is not a 64-character lowercase hex string", + s + )); + } + Ok(rest.to_string()) +} + +/// Compute sha256 of `bytes` and verify it equals `expected_hex`. +/// Used to validate manifest and config blobs (small, already in +/// memory). +pub(super) fn verify_blob_bytes( + label: &str, + bytes: &[u8], + expected_hex: &str, +) -> crate::Result<()> { + let actual = Digest256::from_bytes(bytes); + if actual.hex != expected_hex { + return Err(crate::new_error!( + "{} blob digest mismatch: descriptor declares sha256:{}, file hashes to sha256:{}", + label, + expected_hex, + actual.hex + )); + } + Ok(()) +} + +/// Stream-hash an already-open file and verify its sha256 equals +/// `expected_hex`. +/// +/// Takes the same `File` handle the caller will subsequently `mmap`, +/// not a path. Hashing one open and mapping another is open-then- +/// replace TOCTOU bait. Seeks to start before and after so the +/// caller's file position is unchanged. +pub(super) fn verify_blob_file( + label: &str, + file: &mut std::fs::File, + expected_hex: &str, +) -> crate::Result<()> { + file.seek(SeekFrom::Start(0)) + .map_err(|e| crate::new_error!("failed to seek {} blob: {}", label, e))?; + let mut hasher = Sha256::new(); + let mut buf = [0u8; 64 * 1024]; + loop { + let n = file + .read(&mut buf) + .map_err(|e| crate::new_error!("failed to read {} blob: {}", label, e))?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + file.seek(SeekFrom::Start(0)) + .map_err(|e| crate::new_error!("failed to rewind {} blob: {}", label, e))?; + let actual = Digest256::from_hasher(hasher); + if actual.hex != expected_hex { + return Err(crate::new_error!( + "{} blob digest mismatch: descriptor declares sha256:{}, file hashes to sha256:{}", + label, + expected_hex, + actual.hex + )); + } + Ok(()) +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs b/src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs new file mode 100644 index 000000000..bd0bae63f --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/fsutil.rs @@ -0,0 +1,101 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::io::Read; +use std::path::Path; + +use super::digest::Digest256; + +/// Write `bytes` to `target` atomically: write to a sibling tmp +/// file in the same directory, fsync nothing extra, then `rename`. +/// On rename failure the tmp file is removed. +/// +/// The tmp name embeds pid and a monotonic-ish nanos suffix to keep +/// concurrent writers in the same directory from colliding on the +/// tmp path. Concurrent writers to the same `target` still race on +/// the final rename, which is the caller's contract to avoid. +pub(super) fn write_file_atomic(target: &Path, bytes: &[u8]) -> crate::Result<()> { + let parent = target.parent().unwrap_or(Path::new(".")); + let file_name = target.file_name().and_then(|s| s.to_str()).ok_or_else(|| { + crate::new_error!("atomic write: target {:?} has no UTF-8 file name", target) + })?; + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let tmp = parent.join(format!( + ".{}.tmp-{}-{}", + file_name, + std::process::id(), + nanos + )); + std::fs::write(&tmp, bytes) + .map_err(|e| crate::new_error!("atomic write: failed to write tmp {:?}: {}", tmp, e))?; + std::fs::rename(&tmp, target).map_err(|e| { + let _ = std::fs::remove_file(&tmp); + crate::new_error!( + "atomic write: failed to rename {:?} -> {:?}: {}", + tmp, + target, + e + ) + }) +} + +/// Write a content-addressed blob into `blobs_dir`, skipping the +/// write if a file at `blobs_dir/` is already present and has +/// the expected length. Skipping is safe because the filename is the +/// sha256 of the bytes: a name match implies a content match outside +/// of a hash collision. The size check defends against half-written +/// stragglers left over from a previous crash. +pub(super) fn write_blob_if_absent( + blobs_dir: &Path, + digest: &Digest256, + bytes: &[u8], +) -> crate::Result<()> { + let target = blobs_dir.join(&digest.hex); + if let Ok(meta) = std::fs::metadata(&target) + && meta.is_file() + && meta.len() == bytes.len() as u64 + { + return Ok(()); + } + write_file_atomic(&target, bytes) +} + +/// Read a file in full, refusing if the file is bigger than `max_size`. +/// +/// The cap is enforced on the actual byte stream via [`Read::take`], so files +/// whose `metadata().len()` is misleading cannot exceed the limit. +pub(super) fn read_bounded(path: &Path, max_size: u64) -> crate::Result> { + let f = std::fs::File::open(path) + .map_err(|e| crate::new_error!("failed to open {:?}: {}", path, e))?; + let hint = f.metadata().map(|m| m.len().min(max_size)).unwrap_or(0); + let mut buf = Vec::with_capacity(hint as usize); + // Read one extra byte so we can distinguish "exactly at the limit" from + // "over the limit" instead of silently truncating an oversize file. + f.take(max_size.saturating_add(1)) + .read_to_end(&mut buf) + .map_err(|e| crate::new_error!("failed to read {:?}: {}", path, e))?; + if buf.len() as u64 > max_size { + return Err(crate::new_error!( + "file {:?} exceeds maximum allowed {} bytes", + path, + max_size + )); + } + Ok(buf) +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs new file mode 100644 index 000000000..963369f58 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs @@ -0,0 +1,34 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Media types are versioned by suffix. The loader matches each +// version specifically (no `_CURRENT` shortcut on the read side); the +// writer always emits `_CURRENT`. A new version is added by: +// +// 1. Declare `MT_FOO_V2` next to `MT_FOO_V1`. +// 2. Point `MT_FOO_CURRENT` at `MT_FOO_V2`. +// 3. Add a dispatch arm in the loader that converts v1 -> v2 (or +// rejects v1 if no compatibility window is offered). +pub(super) const MT_CONFIG_V1: &str = "application/vnd.hyperlight.snapshot.config.v1+json"; +pub(super) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; +pub(super) const MT_SNAPSHOT_V1: &str = "application/vnd.hyperlight.snapshot.memory.v1"; +pub(super) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; + +/// ABI version for the snapshot memory blob. Bumped whenever the +/// host-guest contract for the bytes inside the snapshot blob changes +/// (PEB layout, calling convention, init state, etc.). Independent of +/// the config blob's media-type version. +pub(super) const SNAPSHOT_ABI_VERSION: u32 = 1; diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs new file mode 100644 index 000000000..24b3bd067 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs @@ -0,0 +1,693 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! OCI Image Layout serde for [`Snapshot`]. See +//! `docs/snapshot-oci-format.md` for the on-disk format. + +mod config; +mod digest; +mod fsutil; +mod media_types; + +use std::path::Path; + +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; +use hyperlight_common::vmem::PAGE_SIZE; +use oci_spec::image::{ + Descriptor, DescriptorBuilder, ImageIndex, ImageIndexBuilder, ImageManifest, + ImageManifestBuilder, MediaType, SCHEMA_VERSION, +}; + +use self::config::{ + Arch, Entrypoint, HostFunction, Hypervisor, MemoryLayout, OciSnapshotConfig, Sregs, +}; +use self::digest::{Digest256, oci_digest, parse_oci_digest, verify_blob_bytes, verify_blob_file}; +use self::fsutil::{read_bounded, write_blob_if_absent, write_file_atomic}; +use self::media_types::{ + MT_CONFIG_CURRENT, MT_CONFIG_V1, MT_SNAPSHOT_CURRENT, MT_SNAPSHOT_V1, SNAPSHOT_ABI_VERSION, +}; +use super::{NextAction, Snapshot}; +use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::memory_region::MemoryRegionFlags; +use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; + +const OCI_LAYOUT_VERSION: &str = "1.0.0"; + +/// Maximum size of the config JSON blob. Bounds the allocation done +/// before we parse the JSON. +const MAX_CONFIG_BLOB_SIZE: u64 = 1024 * 1024; + +/// OCI standard annotation key for a manifest's tag inside an image +/// index. Set on the manifest descriptor in `index.json`, not on the +/// manifest blob itself. See the OCI Image Spec, "Annotations" and +/// the Image Layout spec. +const ANNOTATION_REF_NAME: &str = "org.opencontainers.image.ref.name"; + +/// Validate a tag against the OCI Distribution spec rules: +/// `[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`. Required so that the same +/// strings work both in our local layout and when pushed to a +/// registry via `oras` / `crane` / `skopeo`. +fn validate_tag(tag: &str) -> crate::Result<()> { + let bytes = tag.as_bytes(); + if bytes.is_empty() || bytes.len() > 128 { + return Err(crate::new_error!( + "tag {:?} is invalid: must be 1..=128 bytes", + tag + )); + } + let first = bytes[0]; + if !(first.is_ascii_alphanumeric() || first == b'_') { + return Err(crate::new_error!( + "tag {:?} is invalid: first character must be alphanumeric or '_'", + tag + )); + } + for &b in &bytes[1..] { + if !(b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'-') { + return Err(crate::new_error!( + "tag {:?} is invalid: characters after the first must be \ + alphanumeric or one of '_', '.', '-'", + tag + )); + } + } + Ok(()) +} + +impl Snapshot { + /// Save this snapshot into the OCI Image Layout directory at + /// `path` under `tag`. + /// + /// `tag` is written to `index.json` as + /// `org.opencontainers.image.ref.name` and must satisfy the OCI + /// tag grammar (`[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`). + /// + /// The parent directory of `path` must already exist. `path` + /// itself is created if absent. If a layout already exists at + /// `path`, this call appends to it: other tags in `index.json` + /// are kept untouched, and a manifest descriptor whose + /// `org.opencontainers.image.ref.name` annotation equals `tag` + /// is replaced. Blobs are content-addressed and shared across + /// tags. See `docs/snapshot-oci-format.md` for the full on-disk + /// format and atomicity guarantees. + /// + /// A pre-existing `oci-layout` file must declare a supported + /// `imageLayoutVersion`. Otherwise the call errors without + /// touching the directory. + /// + /// # Portability + /// + /// Snapshot images are bound to a specific CPU architecture and + /// hypervisor. Both are recorded in the config blob and checked + /// at load time, with mismatches rejected with a clear error. + /// The hypervisor tag (kvm/mshv/whp) constrains the host OS. + pub fn to_oci(&self, path: impl AsRef, tag: &str) -> crate::Result<()> { + let path = path.as_ref(); + validate_tag(tag)?; + + // The parent directory must already exist. `path` itself is + // created if absent. An existing regular file at `path` is + // rejected by the underlying `create_dir`. + match path.parent() { + Some(p) if !p.as_os_str().is_empty() => { + let parent_meta = std::fs::metadata(p).map_err(|e| { + crate::new_error!("to_oci: parent directory {:?} not accessible: {}", p, e) + })?; + if !parent_meta.is_dir() { + return Err(crate::new_error!( + "to_oci: parent of {:?} is not a directory", + path + )); + } + } + _ => {} + } + match std::fs::create_dir(path) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + let meta = std::fs::metadata(path) + .map_err(|e| crate::new_error!("to_oci: failed to stat {:?}: {}", path, e))?; + if !meta.is_dir() { + return Err(crate::new_error!( + "to_oci: {:?} exists and is not a directory", + path + )); + } + } + Err(e) => { + return Err(crate::new_error!( + "to_oci: failed to create layout dir {:?}: {}", + path, + e + )); + } + } + + // Validate any pre-existing `oci-layout` marker before + // touching anything else, so a foreign layout (future + // version, hand-edited file) is reported without altering + // the directory. + let layout_marker = path.join("oci-layout"); + let marker_existed = layout_marker + .try_exists() + .map_err(|e| crate::new_error!("to_oci: failed to stat {:?}: {}", layout_marker, e))?; + if marker_existed { + let bytes = read_bounded(&layout_marker, MAX_CONFIG_BLOB_SIZE).map_err(|e| { + crate::new_error!("to_oci: failed to read existing oci-layout: {}", e) + })?; + let v: serde_json::Value = serde_json::from_slice(&bytes).map_err(|e| { + crate::new_error!("to_oci: existing oci-layout is not valid JSON: {}", e) + })?; + match v.get("imageLayoutVersion").and_then(|s| s.as_str()) { + Some(s) if s == OCI_LAYOUT_VERSION => {} + Some(other) => { + return Err(crate::new_error!( + "to_oci: existing imageLayoutVersion {:?} is unsupported (expected {:?})", + other, + OCI_LAYOUT_VERSION + )); + } + None => { + return Err(crate::new_error!( + "to_oci: existing oci-layout is missing imageLayoutVersion" + )); + } + } + } + + let index_path = path.join("index.json"); + let index_existed = index_path + .try_exists() + .map_err(|e| crate::new_error!("to_oci: failed to stat {:?}: {}", index_path, e))?; + let mut manifests: Vec = if index_existed { + let bytes = read_bounded(&index_path, MAX_CONFIG_BLOB_SIZE).map_err(|e| { + crate::new_error!("to_oci: failed to read existing index.json: {}", e) + })?; + let existing: ImageIndex = serde_json::from_slice(&bytes).map_err(|e| { + crate::new_error!( + "to_oci: existing index.json is not a valid OCI image index: {}", + e + ) + })?; + existing.manifests().to_vec() + } else { + Vec::new() + }; + + let new_desc = self.write_blobs_and_build_descriptor(path, tag)?; + + // Replacement is by tag, not by digest: a new snapshot may + // hash to a different value but still claim the same logical + // ref. Blobs from the replaced manifest become orphans. + manifests.retain(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str() != tag) + .unwrap_or(true) + }); + manifests.push(new_desc); + + let index = ImageIndexBuilder::default() + .schema_version(SCHEMA_VERSION) + .media_type(MediaType::ImageIndex) + .manifests(manifests) + .build() + .map_err(|e| crate::new_error!("failed to build OCI index: {}", e))?; + let index_bytes = serde_json::to_vec_pretty(&index) + .map_err(|e| crate::new_error!("failed to serialise OCI index: {}", e))?; + + // Write the marker before the index swap. A loader that sees + // the new index requires the marker; ordering them this way + // keeps the layout valid at every step. + if !marker_existed { + let layout_bytes = serde_json::to_vec(&serde_json::json!({ + "imageLayoutVersion": OCI_LAYOUT_VERSION, + })) + .map_err(|e| crate::new_error!("failed to serialise oci-layout: {}", e))?; + write_file_atomic(&layout_marker, &layout_bytes)?; + } + + // Index swap is the commit point. + write_file_atomic(&index_path, &index_bytes)?; + + Ok(()) + } + + fn write_blobs_and_build_descriptor(&self, dir: &Path, tag: &str) -> crate::Result { + let blobs_dir = dir.join("blobs").join("sha256"); + std::fs::create_dir_all(&blobs_dir).map_err(|e| { + crate::new_error!("failed to create OCI blobs dir {:?}: {}", blobs_dir, e) + })?; + + // Snapshot blob: the raw memory bytes. + let memory_bytes = self.memory.as_slice(); + let memory_size = memory_bytes.len(); + if memory_size == 0 || memory_size % PAGE_SIZE != 0 { + return Err(crate::new_error!( + "snapshot memory size {} must be a non-zero multiple of PAGE_SIZE", + memory_size + )); + } + let snapshot_digest = Digest256::from_bytes(memory_bytes); + write_blob_if_absent(&blobs_dir, &snapshot_digest, memory_bytes)?; + + // Config blob. + let cfg = self.build_config()?; + let cfg_bytes = serde_json::to_vec_pretty(&cfg) + .map_err(|e| crate::new_error!("failed to serialise config JSON: {}", e))?; + let cfg_digest = Digest256::from_bytes(&cfg_bytes); + write_blob_if_absent(&blobs_dir, &cfg_digest, &cfg_bytes)?; + + // Manifest blob. + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other(MT_CONFIG_CURRENT.to_string())) + .digest(oci_digest(&cfg_digest)?) + .size(cfg_bytes.len() as u64) + .build() + .map_err(|e| crate::new_error!("failed to build config descriptor: {}", e))?; + let snapshot_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other(MT_SNAPSHOT_CURRENT.to_string())) + .digest(oci_digest(&snapshot_digest)?) + .size(memory_size as u64) + .build() + .map_err(|e| crate::new_error!("failed to build snapshot descriptor: {}", e))?; + // `artifactType` is set equal to `config.mediaType` per OCI + // image-spec "Guidelines for Artifact Usage". Registries + // surface this on the distribution-spec referrers API. Tools + // that read only `config.mediaType` see the same value. + let manifest = ImageManifestBuilder::default() + .schema_version(SCHEMA_VERSION) + .media_type(MediaType::ImageManifest) + .artifact_type(MediaType::Other(MT_CONFIG_CURRENT.to_string())) + .config(config_descriptor) + .layers(vec![snapshot_descriptor]) + .build() + .map_err(|e| crate::new_error!("failed to build OCI manifest: {}", e))?; + let manifest_bytes = serde_json::to_vec_pretty(&manifest) + .map_err(|e| crate::new_error!("failed to serialise OCI manifest: {}", e))?; + let manifest_digest = Digest256::from_bytes(&manifest_bytes); + write_blob_if_absent(&blobs_dir, &manifest_digest, &manifest_bytes)?; + + let mut anns = std::collections::HashMap::new(); + anns.insert(ANNOTATION_REF_NAME.to_string(), tag.to_string()); + DescriptorBuilder::default() + .media_type(MediaType::ImageManifest) + .digest(oci_digest(&manifest_digest)?) + .size(manifest_bytes.len() as u64) + .annotations(anns) + .build() + .map_err(|e| crate::new_error!("failed to build manifest descriptor: {}", e)) + } + + fn build_config(&self) -> crate::Result { + let entrypoint = match (self.entrypoint, self.sregs.as_ref()) { + (NextAction::Initialise(addr), None) => Entrypoint::Initialise { addr }, + (NextAction::Call(addr), Some(sregs)) => Entrypoint::Call { + addr, + sregs: Box::new(Sregs::from(sregs)), + }, + (NextAction::Initialise(_), Some(_)) => { + return Err(crate::new_error!( + "snapshot inconsistent: Initialise entrypoint must not have sregs" + )); + } + (NextAction::Call(_), None) => { + return Err(crate::new_error!( + "snapshot inconsistent: Call entrypoint must have sregs" + )); + } + #[cfg(test)] + (NextAction::None, _) => { + return Err(crate::new_error!( + "snapshot with NextAction::None cannot be persisted" + )); + } + }; + + let host_functions = match &self.host_functions.host_functions { + Some(v) => v.iter().map(HostFunction::from).collect(), + None => Vec::new(), + }; + + let l = &self.layout; + Ok(OciSnapshotConfig { + hyperlight_version: env!("CARGO_PKG_VERSION").to_string(), + arch: Arch::current(), + abi_version: SNAPSHOT_ABI_VERSION, + hypervisor: Hypervisor::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to tag snapshot"))?, + stack_top_gva: self.stack_top_gva, + entrypoint, + layout: MemoryLayout { + input_data_size: l.input_data_size, + output_data_size: l.output_data_size, + heap_size: l.heap_size, + code_size: l.code_size, + init_data_size: l.init_data_size, + init_data_permissions: l.init_data_permissions.map(|f| f.bits()), + scratch_size: l.get_scratch_size(), + snapshot_size: l.snapshot_size, + pt_size: l.pt_size, + }, + memory_size: self.memory.mem_size() as u64, + host_functions, + snapshot_generation: self.snapshot_generation, + }) + } + + /// Load the snapshot tagged `tag` from an OCI Image Layout + /// directory at `path`. + /// + /// `tag` selects one manifest from `index.json` using + /// `org.opencontainers.image.ref.name`. Missing tags and duplicate + /// tags are rejected. + /// + /// This verifies sha256 for manifest, config, and snapshot blobs. + /// Use [`Snapshot::from_oci_unchecked`] to skip digest verification + /// in trusted paths. + /// + /// Returns an error for arch, hypervisor, and ABI mismatches. + /// + /// # File-mutation hazard + /// + /// Do not modify or replace files in `path` while the returned + /// `Snapshot` (or sandboxes built from it) is still alive. + pub fn from_oci(path: impl AsRef, tag: &str) -> crate::Result { + Self::from_oci_inner(path.as_ref(), tag, true) + } + + /// Like [`Snapshot::from_oci`] but **skips sha256 verification of + /// the manifest, config, and snapshot blob bytes**, trading + /// integrity checking for performance. All other validation + /// (OCI structure, descriptor sizes, schema versions, arch / + /// hypervisor / ABI tags, layout bounds, entrypoint bounds) is + /// unchanged. + pub fn from_oci_unchecked(path: impl AsRef, tag: &str) -> crate::Result { + Self::from_oci_inner(path.as_ref(), tag, false) + } + + fn from_oci_inner(path: &Path, tag: &str, verify_blobs: bool) -> crate::Result { + validate_tag(tag)?; + let meta = std::fs::metadata(path) + .map_err(|e| crate::new_error!("from_oci failed to stat {:?}: {}", path, e))?; + if !meta.is_dir() { + return Err(crate::new_error!( + "from_oci path {:?} is not a directory", + path + )); + } + + // 1. oci-layout + let layout_bytes = read_bounded(&path.join("oci-layout"), MAX_CONFIG_BLOB_SIZE) + .map_err(|e| crate::new_error!("failed to read oci-layout: {}", e))?; + let layout_json: serde_json::Value = serde_json::from_slice(&layout_bytes) + .map_err(|e| crate::new_error!("oci-layout is not valid JSON: {}", e))?; + let v = layout_json + .get("imageLayoutVersion") + .and_then(|v| v.as_str()) + .ok_or_else(|| crate::new_error!("oci-layout missing imageLayoutVersion field"))?; + if v != OCI_LAYOUT_VERSION { + return Err(crate::new_error!( + "unsupported OCI image layout version {:?} (expected {:?})", + v, + OCI_LAYOUT_VERSION + )); + } + + // 2. index.json -> manifest descriptor for `tag`. Multiple + // manifests are fine in OCI Image Layout; we select the + // one whose `org.opencontainers.image.ref.name` annotation + // matches the requested tag. Two manifests with the same + // tag is a malformed layout. + let index_bytes = read_bounded(&path.join("index.json"), MAX_CONFIG_BLOB_SIZE) + .map_err(|e| crate::new_error!("failed to read index.json: {}", e))?; + let index: ImageIndex = serde_json::from_slice(&index_bytes) + .map_err(|e| crate::new_error!("failed to parse index.json: {}", e))?; + let mut matching = index.manifests().iter().filter(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str() == tag) + .unwrap_or(false) + }); + let manifest_desc = match (matching.next(), matching.next()) { + (None, _) => { + let known: Vec<&str> = index + .manifests() + .iter() + .filter_map(|d| { + d.annotations() + .as_ref() + .and_then(|a| a.get(ANNOTATION_REF_NAME)) + .map(|s| s.as_str()) + }) + .collect(); + return Err(crate::new_error!( + "no manifest tagged {:?} in OCI layout {:?}. Available tags: {:?}", + tag, + path, + known + )); + } + (Some(_), Some(_)) => { + return Err(crate::new_error!( + "OCI layout {:?} has multiple manifests tagged {:?}; tags must be unique", + path, + tag + )); + } + (Some(d), None) => d, + }; + // The manifest descriptor must advertise an OCI image + // manifest. Refuse anything else up front so we never try to + // parse, say, an image index or an arbitrary artifact blob + // as an `ImageManifest`. + if !matches!(manifest_desc.media_type(), MediaType::ImageManifest) { + return Err(crate::new_error!( + "manifest descriptor for tag {:?} has unexpected media type {:?} (expected {:?})", + tag, + manifest_desc.media_type().to_string(), + MediaType::ImageManifest.to_string() + )); + } + let manifest_hex = parse_oci_digest(manifest_desc.digest().as_ref())?; + + // 3. manifest blob + let manifest_path = path.join("blobs").join("sha256").join(&manifest_hex); + let manifest_bytes = read_bounded(&manifest_path, MAX_CONFIG_BLOB_SIZE)?; + if manifest_bytes.len() as u64 != manifest_desc.size() { + return Err(crate::new_error!( + "OCI manifest size mismatch: descriptor says {}, file is {}", + manifest_desc.size(), + manifest_bytes.len() + )); + } + if verify_blobs { + verify_blob_bytes("manifest", &manifest_bytes, &manifest_hex)?; + } + let manifest: ImageManifest = serde_json::from_slice(&manifest_bytes) + .map_err(|e| crate::new_error!("failed to parse OCI manifest JSON: {}", e))?; + if manifest.schema_version() != SCHEMA_VERSION { + return Err(crate::new_error!( + "unsupported OCI manifest schemaVersion {} (expected {})", + manifest.schema_version(), + SCHEMA_VERSION + )); + } + let cfg_desc = manifest.config(); + // Loader dispatch on config media type. A future v2 lands + // as a new arm that converts to the in-memory current shape. + let cfg_media = cfg_desc.media_type().to_string(); + match cfg_media.as_str() { + MT_CONFIG_V1 => {} + other => { + return Err(crate::new_error!( + "unexpected config media type {:?} (supported: {:?})", + other, + MT_CONFIG_V1 + )); + } + } + // `artifactType` mirrors `config.mediaType` (manifest.md + // "Guidelines for Artifact Usage"). The OCI spec leaves this + // field OPTIONAL. A Hyperlight snapshot requires it to be + // present and equal to `config.mediaType` so loaders can + // distinguish a Hyperlight artifact from an arbitrary + // manifest that happens to share blob layout. + match manifest.artifact_type() { + Some(at) if at.to_string() == cfg_media => {} + Some(at) => { + return Err(crate::new_error!( + "OCI manifest artifactType {:?} does not match config media type {:?}", + at.to_string(), + cfg_media + )); + } + None => { + return Err(crate::new_error!( + "OCI manifest is missing required artifactType (expected {:?})", + cfg_media + )); + } + } + let layers = manifest.layers(); + if layers.len() != 1 { + return Err(crate::new_error!( + "expected exactly one OCI layer (the snapshot), found {}", + layers.len() + )); + } + let snap_desc = &layers[0]; + let snap_media = snap_desc.media_type().to_string(); + match snap_media.as_str() { + MT_SNAPSHOT_V1 => {} + other => { + return Err(crate::new_error!( + "unexpected snapshot layer media type {:?} (supported: {:?})", + other, + MT_SNAPSHOT_V1 + )); + } + } + + // 4. config blob + let cfg_hex = parse_oci_digest(cfg_desc.digest().as_ref())?; + let cfg_path = path.join("blobs").join("sha256").join(&cfg_hex); + let cfg_bytes = read_bounded(&cfg_path, MAX_CONFIG_BLOB_SIZE)?; + if cfg_bytes.len() as u64 != cfg_desc.size() { + return Err(crate::new_error!( + "config blob size mismatch: descriptor says {}, file is {}", + cfg_desc.size(), + cfg_bytes.len() + )); + } + if verify_blobs { + verify_blob_bytes("config", &cfg_bytes, &cfg_hex)?; + } + let cfg: OciSnapshotConfig = serde_json::from_slice(&cfg_bytes) + .map_err(|e| crate::new_error!("failed to parse Hyperlight config JSON: {}", e))?; + cfg.validate_for_load()?; + + // 5. snapshot blob: open once, hash and mmap the same + // handle so an attacker cannot swap the file between + // verification and mapping. + let snap_hex = parse_oci_digest(snap_desc.digest().as_ref())?; + let snap_path = path.join("blobs").join("sha256").join(&snap_hex); + let mut snap_file = std::fs::File::open(&snap_path).map_err(|e| { + crate::new_error!("failed to open snapshot blob {:?}: {}", snap_path, e) + })?; + let snap_file_len = snap_file + .metadata() + .map_err(|e| crate::new_error!("failed to stat snapshot blob: {}", e))? + .len(); + let expected_blob_len = cfg.memory_size; + if snap_file_len != expected_blob_len { + return Err(crate::new_error!( + "snapshot blob size mismatch: file is {} bytes, expected {} \ + (memory_size)", + snap_file_len, + expected_blob_len, + )); + } + if snap_file_len != snap_desc.size() { + return Err(crate::new_error!( + "snapshot blob size {} disagrees with OCI descriptor size {}", + snap_file_len, + snap_desc.size() + )); + } + if verify_blobs { + verify_blob_file("snapshot", &mut snap_file, &snap_hex)?; + } + + // 6. Reconstruct layout. + let mut sbox_cfg = crate::sandbox::SandboxConfiguration::default(); + sbox_cfg.set_input_data_size(cfg.layout.input_data_size); + sbox_cfg.set_output_data_size(cfg.layout.output_data_size); + sbox_cfg.set_heap_size(cfg.layout.heap_size as u64); + sbox_cfg.set_scratch_size(cfg.layout.scratch_size); + let init_data_perms = match cfg.layout.init_data_permissions { + None => None, + Some(bits) => Some(MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!( + "snapshot init_data_permissions {:#x} contains unknown flag bits", + bits + ) + })?), + }; + let mut layout = SandboxMemoryLayout::new( + sbox_cfg, + cfg.layout.code_size, + cfg.layout.init_data_size, + init_data_perms, + )?; + // `snapshot_size` and `pt_size` are independent fields. + if let Some(pt) = cfg.layout.pt_size { + layout.set_pt_size(pt)?; + } + layout.set_snapshot_size(cfg.layout.snapshot_size); + + // 7. mmap the snapshot blob (file-backed CoW). The blob is + // the raw memory image. `ReadonlySharedMemory::from_file` + // surrounds it with host guard pages. The guest mapping + // of the snapshot region covers only the data prefix + // (`snapshot_size`). The PT tail sits past that prefix + // in the host mapping and is copied into the scratch + // region on restore. Keeping it out of the guest mapping + // of the snapshot region avoids overlap with + // `map_file_cow` regions installed immediately after the + // snapshot in guest PA space. + let memory = ReadonlySharedMemory::from_file(&snap_file, layout.snapshot_size)?; + + // 8. Build entrypoint + sregs back from the tagged enum. + let (entrypoint, sregs) = match cfg.entrypoint { + Entrypoint::Initialise { addr } => (NextAction::Initialise(addr), None), + Entrypoint::Call { addr, sregs } => ( + NextAction::Call(addr), + Some(CommonSpecialRegisters::from(*sregs)), + ), + }; + + // 9. Reconstitute host_functions metadata. + let snapshot_generation = cfg.snapshot_generation; + let host_funcs_vec: Vec< + hyperlight_common::flatbuffer_wrappers::host_function_definition::HostFunctionDefinition, + > = cfg.host_functions.into_iter().map(Into::into).collect(); + let host_functions = if host_funcs_vec.is_empty() { + HostFunctionDetails { + host_functions: None, + } + } else { + HostFunctionDetails { + host_functions: Some(host_funcs_vec), + } + }; + + Ok(Snapshot { + layout, + memory, + regions: Vec::new(), + load_info: crate::mem::exe::LoadInfo::dummy(), + stack_top_gva: cfg.stack_top_gva, + sregs, + entrypoint, + snapshot_generation, + host_functions, + }) + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index 91fad0d4c..77647cada 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ +mod file; +mod file_tests; + use std::collections::{BTreeMap, HashMap}; use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; From a8fb19b81eaa13abd2a7acbe76b52e78b5644a4a Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 16:01:13 -0700 Subject: [PATCH 2/3] Add tests for OCI snapshot persistence Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/sandbox/snapshot/file_tests.rs | 2660 +++++++++++++++++ 1 file changed, 2660 insertions(+) create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file_tests.rs diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs new file mode 100644 index 000000000..271988cf6 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -0,0 +1,2660 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Tests for the OCI Image Layout snapshot format (`super::file`). + +#![cfg(test)] + +use std::sync::Arc; + +use hyperlight_testing::simple_guest_as_string; +use serde_json::Value; +use sha2::{Digest as _, Sha256}; + +use crate::func::Registerable; +use crate::sandbox::snapshot::Snapshot; +use crate::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; + +fn create_test_sandbox() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + UninitializedSandbox::new(GuestBinary::FilePath(path), None) + .unwrap() + .evolve() + .unwrap() +} + +fn create_snapshot_from_binary() -> Snapshot { + let path = simple_guest_as_string().unwrap(); + Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap() +} + +/// `Result::unwrap_err` requires `T: Debug`, but `Snapshot` is not +/// `Debug`. This wrapper is the test-side equivalent. +#[track_caller] +fn unwrap_err_snapshot(r: crate::Result) -> crate::HyperlightError { + match r { + Err(e) => e, + Ok(_) => panic!("expected Snapshot::from_oci to fail"), + } +} + +/// Locate the single config blob inside `oci_dir`. Returns its full +/// path. Used by tests that mutate the on-disk JSON. +fn find_config_blob(oci_dir: &std::path::Path) -> std::path::PathBuf { + let manifest_bytes = std::fs::read(oci_dir.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = oci_dir.join("blobs").join("sha256").join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let cfg_digest = manifest["config"]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + oci_dir.join("blobs").join("sha256").join(cfg_digest) +} + +// ============================================================================= +// In-memory `from_snapshot` round-trips (no file I/O). +// ============================================================================= + +#[test] +fn from_snapshot_already_initialized_in_memory() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None).unwrap(); + let result: i32 = sbox2.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn from_snapshot_in_memory_pre_init() { + let snap = create_snapshot_from_binary(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(snap), HostFunctions::default(), None).unwrap(); + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +// ============================================================================= +// Round-trip via OCI layout on disk. +// ============================================================================= + +#[test] +fn round_trip_save_load_call() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let oci = dir.path().join("snap"); + snapshot.to_oci(&oci, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&oci, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +#[test] +fn snapshot_and_pt_size_round_trip() { + // Running-sandbox snapshot. + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original_snapshot_size = snap.layout().snapshot_size; + let original_pt_size = snap.layout().pt_size; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("running"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().snapshot_size, original_snapshot_size); + assert_eq!(loaded.layout().pt_size, original_pt_size); + + // Pre-init snapshot. + let preinit = create_snapshot_from_binary(); + let preinit_snapshot_size = preinit.layout().snapshot_size; + let preinit_pt_size = preinit.layout().pt_size; + + let path = dir.path().join("preinit"); + preinit.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().snapshot_size, preinit_snapshot_size); + assert_eq!(loaded.layout().pt_size, preinit_pt_size); +} + +#[test] +fn snapshot_generation_round_trip() { + let mut sbox = create_test_sandbox(); + sbox.call::("Echo", "a".to_string()).unwrap(); + let snap1 = sbox.snapshot().unwrap(); + sbox.call::("Echo", "b".to_string()).unwrap(); + sbox.call::("Echo", "c".to_string()).unwrap(); + let snap3 = sbox.snapshot().unwrap(); + let gen1 = snap1.snapshot_generation(); + let gen3 = snap3.snapshot_generation(); + assert_ne!(gen1, gen3); + + let dir = tempfile::tempdir().unwrap(); + let p1 = dir.path().join("s1"); + let p3 = dir.path().join("s3"); + snap1.to_oci(&p1, "latest").unwrap(); + snap3.to_oci(&p3, "latest").unwrap(); + + let loaded1 = Snapshot::from_oci(&p1, "latest").unwrap(); + let loaded3 = Snapshot::from_oci(&p3, "latest").unwrap(); + assert_eq!(loaded1.snapshot_generation(), gen1); + assert_eq!(loaded3.snapshot_generation(), gen3); +} + +#[test] +fn pre_init_snapshot_save_load() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("preinit"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +// ============================================================================= +// Restore semantics (id/generation gating). +// ============================================================================= + +#[test] +fn restore_from_loaded_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "latest").unwrap()); + let mut sbox2 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox2.call::("AddToStatic", 5i32).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 5); + + sbox2.restore(loaded).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn restore_across_independent_oci_loads_succeeds() { + // Compatibility between a sandbox and a snapshot is structural + // (memory layout plus host-function set). Two independent + // `from_oci` loads of the same image produce structurally + // identical snapshots, so a sandbox built from one accepts a + // restore from the other. + let mut sbox = create_test_sandbox(); + let snap1 = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let p1 = dir.path().join("snap1"); + snap1.to_oci(&p1, "latest").unwrap(); + let p2 = dir.path().join("snap2"); + snap1.to_oci(&p2, "latest").unwrap(); + + let loaded1 = Arc::new(Snapshot::from_oci(&p1, "latest").unwrap()); + let loaded2 = Arc::new(Snapshot::from_oci(&p2, "latest").unwrap()); + + let mut sbox = MultiUseSandbox::from_snapshot(loaded2, HostFunctions::default(), None).unwrap(); + sbox.restore(loaded1).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn many_sandboxes_share_single_arc_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "latest").unwrap()); + let mut sandboxes = Vec::new(); + for _ in 0..4 { + sandboxes.push( + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(), + ); + } + for sbox in sandboxes.iter_mut() { + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + } +} + +#[test] +fn concurrent_sandboxes_from_same_oci() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let path = std::sync::Arc::new(path); + let mut handles = Vec::new(); + for _ in 0..4 { + let p = path.clone(); + handles.push(std::thread::spawn(move || { + let loaded = Snapshot::from_oci(p.as_ref(), "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + })); + } + for h in handles { + h.join().unwrap(); + } +} + +#[test] +fn cow_does_not_mutate_backing_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Hash every blob file to verify nothing changes after a CoW write + // through the loaded sandbox. + let blobs_dir = path.join("blobs").join("sha256"); + let snapshot_before: std::collections::BTreeMap<_, _> = std::fs::read_dir(&blobs_dir) + .unwrap() + .map(|e| { + let e = e.unwrap(); + let bytes = std::fs::read(e.path()).unwrap(); + (e.file_name(), bytes) + }) + .collect(); + + { + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .unwrap(); + sbox.call::("AddToStatic", 99).unwrap(); + } + + let snapshot_after: std::collections::BTreeMap<_, _> = std::fs::read_dir(&blobs_dir) + .unwrap() + .map(|e| { + let e = e.unwrap(); + let bytes = std::fs::read(e.path()).unwrap(); + (e.file_name(), bytes) + }) + .collect(); + assert_eq!( + snapshot_before, snapshot_after, + "CoW writes must not mutate any blob in the OCI layout" + ); +} + +// ============================================================================= +// Architecture / hypervisor / ABI gating. +// ============================================================================= + +/// Compute sha256 of `bytes` and return the lowercase hex digest. +fn sha256_hex(bytes: &[u8]) -> String { + let arr: [u8; 32] = Sha256::digest(bytes).into(); + hex::encode(arr) +} + +fn rewrite_config(oci_dir: &std::path::Path, mutate: F) { + // Mutate the config blob and rewrite the manifest + index so the + // OCI layout stays self-consistent: blob filenames, descriptor + // sizes, and descriptor sha256 digests all match the current + // bytes on disk. The point of these helpers is to exercise + // field-level validators (arch, abi_version, hypervisor, etc.), + // not the digest layer; tests that want to probe the digest + // layer write raw bytes directly. + let cfg_path = find_config_blob(oci_dir); + let mut cfg: Value = serde_json::from_slice(&std::fs::read(&cfg_path).unwrap()).unwrap(); + mutate(&mut cfg); + let new_cfg_bytes = serde_json::to_vec_pretty(&cfg).unwrap(); + let new_cfg_hex = sha256_hex(&new_cfg_bytes); + let blobs_dir = oci_dir.join("blobs").join("sha256"); + let new_cfg_path = blobs_dir.join(&new_cfg_hex); + std::fs::write(&new_cfg_path, &new_cfg_bytes).unwrap(); + if new_cfg_path != cfg_path { + std::fs::remove_file(&cfg_path).ok(); + } + + let mp = manifest_path(oci_dir); + let mut manifest: Value = serde_json::from_slice(&std::fs::read(&mp).unwrap()).unwrap(); + manifest["config"]["digest"] = Value::from(format!("sha256:{}", new_cfg_hex)); + manifest["config"]["size"] = Value::from(new_cfg_bytes.len() as u64); + let new_manifest_bytes = serde_json::to_vec_pretty(&manifest).unwrap(); + let new_manifest_hex = sha256_hex(&new_manifest_bytes); + let new_manifest_path = blobs_dir.join(&new_manifest_hex); + std::fs::write(&new_manifest_path, &new_manifest_bytes).unwrap(); + if new_manifest_path != mp { + std::fs::remove_file(&mp).ok(); + } + + let index_path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&index_path).unwrap()).unwrap(); + index["manifests"][0]["digest"] = Value::from(format!("sha256:{}", new_manifest_hex)); + index["manifests"][0]["size"] = Value::from(new_manifest_bytes.len() as u64); + std::fs::write(index_path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +/// Locate the manifest blob path inside `oci_dir`. +fn manifest_path(oci_dir: &std::path::Path) -> std::path::PathBuf { + let index: Value = + serde_json::from_slice(&std::fs::read(oci_dir.join("index.json")).unwrap()).unwrap(); + let digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + oci_dir.join("blobs").join("sha256").join(digest) +} + +/// Mutate the on-disk manifest JSON. Updates the index's manifest +/// descriptor `size` and `digest` to match the new manifest bytes +/// so the test exercises the field-level validator we care about, +/// not the digest layer. +fn rewrite_manifest(oci_dir: &std::path::Path, mutate: F) { + let mp = manifest_path(oci_dir); + let mut manifest: Value = serde_json::from_slice(&std::fs::read(&mp).unwrap()).unwrap(); + mutate(&mut manifest); + let new_bytes = serde_json::to_vec_pretty(&manifest).unwrap(); + let new_hex = sha256_hex(&new_bytes); + let blobs_dir = oci_dir.join("blobs").join("sha256"); + let new_path = blobs_dir.join(&new_hex); + std::fs::write(&new_path, &new_bytes).unwrap(); + if new_path != mp { + std::fs::remove_file(&mp).ok(); + } + + let index_path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&index_path).unwrap()).unwrap(); + index["manifests"][0]["digest"] = Value::from(format!("sha256:{}", new_hex)); + index["manifests"][0]["size"] = Value::from(new_bytes.len() as u64); + std::fs::write(index_path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +/// Mutate the on-disk index JSON in place. The index is the root of +/// the OCI layout and is not itself referenced by any digest, so +/// nothing further needs to be updated. +fn rewrite_index(oci_dir: &std::path::Path, mutate: F) { + let path = oci_dir.join("index.json"); + let mut index: Value = serde_json::from_slice(&std::fs::read(&path).unwrap()).unwrap(); + mutate(&mut index); + std::fs::write(path, serde_json::to_vec_pretty(&index).unwrap()).unwrap(); +} + +#[test] +fn arch_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_config(&path, |cfg| { + cfg["arch"] = Value::from("aarch64"); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("architecture") || msg.contains("arch"), + "expected architecture mismatch, got: {}", + msg + ); +} + +#[test] +fn abi_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_config(&path, |cfg| { + cfg["abi_version"] = Value::from(9999u32); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("ABI") || msg.contains("abi"), + "expected ABI version mismatch, got: {}", + msg + ); +} + +#[test] +fn hypervisor_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Pick a hypervisor that is not the current one. + let current = cfg_current_hypervisor(); + let other = if current == "kvm" { "mshv" } else { "kvm" }; + + rewrite_config(&path, |cfg| { + cfg["hypervisor"] = Value::from(other); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("hypervisor"), + "expected hypervisor mismatch, got: {}", + msg + ); +} + +fn cfg_current_hypervisor() -> &'static str { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("probe"); + create_snapshot_from_binary() + .to_oci(&path, "latest") + .unwrap(); + let cfg_path = find_config_blob(&path); + let cfg: Value = serde_json::from_slice(&std::fs::read(&cfg_path).unwrap()).unwrap(); + match cfg["hypervisor"].as_str().unwrap() { + "kvm" => "kvm", + "mshv" => "mshv", + "whp" => "whp", + other => panic!("unknown hypervisor tag {other}"), + } +} + +// ============================================================================= +// Entrypoint vs sregs invariants enforced by serde shape. +// ============================================================================= + +#[test] +fn call_snapshot_without_sregs_rejected() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Strip sregs from the entrypoint variant. serde must reject the + // missing field at parse time. + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + assert_eq!(entry["kind"].as_str().unwrap(), "call"); + entry.remove("sregs"); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("sregs") || msg.contains("missing field") || msg.contains("config"), + "expected serde error about missing sregs, got: {}", + msg + ); +} + +#[test] +fn initialise_snapshot_with_sregs_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Add a bogus sregs field to the Initialise variant. serde must + // reject the unknown field (variant has deny_unknown_fields). + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + assert_eq!(entry["kind"].as_str().unwrap(), "initialise"); + entry.insert("sregs".to_string(), Value::from("{}")); + }); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("sregs") || msg.contains("unknown field") || msg.contains("config"), + "expected serde error about unknown field sregs, got: {}", + msg + ); +} + +// ============================================================================= +// Host functions validation. +// +// `validate_host_functions` enforces a superset relation: every host +// function registered when the snapshot was taken must be present in +// the loaded sandbox's `HostFunctions` with a matching signature. +// Extras in the loaded set are allowed. +// ============================================================================= + +/// Build a `MultiUseSandbox` with the default host functions plus a +/// custom `Add(i32, i32) -> i32`. Used to seed the snapshot side of +/// the host-function validation tests so the snapshot has a +/// non-default required function. +fn create_sandbox_with_custom_host_funcs() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + u.evolve().unwrap() +} + +/// `HostFunctions::default()` plus a matching `Add(i32, i32) -> i32`. +fn host_funcs_with_matching_add() -> HostFunctions { + let mut hf = HostFunctions::default(); + hf.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + hf +} + +#[test] +fn from_snapshot_accepts_matching_host_functions() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), host_funcs_with_matching_add(), None) + .unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn from_snapshot_rejects_missing_host_function() { + // Snapshot was taken with `Add` registered. Loading with the + // default `HostFunctions` (no `Add`) must be rejected. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let err = MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None) + .expect_err("from_snapshot must reject a HostFunctions set missing `Add`"); + let msg = format!("{}", err); + assert!( + msg.contains("missing") && msg.contains("Add"), + "expected missing-host-function error mentioning Add, got: {}", + msg + ); +} + +#[test] +fn from_snapshot_rejects_signature_mismatch() { + // Snapshot has `Add(i32, i32) -> i32`. Load registers an `Add` + // with a different signature. validate_host_functions must + // refuse the mismatch. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = HostFunctions::default(); + hf.register_host_function("Add", |a: String, b: String| Ok(format!("{a}{b}"))) + .unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let err = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None) + .expect_err("from_snapshot must reject a signature mismatch on Add"); + let msg = format!("{}", err); + assert!( + msg.contains("signature mismatches") && msg.contains("Add"), + "expected signature-mismatch error mentioning Add, got: {}", + msg + ); +} + +#[test] +fn from_snapshot_accepts_extra_host_functions() { + // Snapshot has `Add`. Load registers `Add` (matching) plus an + // unrelated `Mul`. Extras are allowed. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = host_funcs_with_matching_add(); + hf.register_host_function("Mul", |a: i32, b: i32| Ok(a * b)) + .unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn from_snapshot_accepts_zero_arg_host_function() { + // A zero-arg host function must round-trip through OCI. + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Zero", || Ok(7i64)).unwrap(); + let mut sbox = u.evolve().unwrap(); + + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + + let mut hf = HostFunctions::default(); + hf.register_host_function("Zero", || Ok(7i64)).unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), hf, None) + .expect("zero-arg host function must round-trip through OCI"); +} + +#[test] +fn from_snapshot_has_default_host_print() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + let _ = sbox2.call::("PrintTwoArgs", ("hi".to_string(), 42i32)); +} + +// ============================================================================= +// OCI-shape invariants. +// ============================================================================= + +#[test] +fn missing_oci_layout_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::remove_file(path.join("oci-layout")).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("oci-layout"), + "expected missing oci-layout error, got: {}", + msg + ); +} + +#[test] +fn wrong_image_layout_version_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::write( + path.join("oci-layout"), + r#"{"imageLayoutVersion":"99.0.0"}"#, + ) + .unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("image layout version") || msg.contains("imageLayoutVersion"), + "expected layout version error, got: {}", + msg + ); +} + +#[test] +fn missing_index_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + std::fs::remove_file(path.join("index.json")).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("index.json"), + "expected missing index.json error, got: {}", + msg + ); +} + +#[test] +fn snapshot_blob_size_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Truncate the snapshot blob by one byte. + let blobs_dir = path.join("blobs").join("sha256"); + let manifest_bytes = std::fs::read(path.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = blobs_dir.join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let snap_path = blobs_dir.join(snap_digest); + let bytes = std::fs::read(&snap_path).unwrap(); + std::fs::write(&snap_path, &bytes[..bytes.len() - 1]).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("size") || msg.contains("mismatch"), + "expected size mismatch error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_zero_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + cfg["layout"]["snapshot_size"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("snapshot_size"), + "expected snapshot_size error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_unaligned_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + let s = cfg["layout"]["snapshot_size"].as_u64().unwrap(); + cfg["layout"]["snapshot_size"] = Value::from(s + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("PAGE_SIZE") || msg.contains("multiple"), + "expected page alignment error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_snapshot_size_must_match_memory_size() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + let page = hyperlight_common::vmem::PAGE_SIZE as u64; + rewrite_config(&path, |cfg| { + let m = cfg["memory_size"].as_u64().unwrap(); + cfg["layout"]["snapshot_size"] = Value::from(m + page); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("does not equal memory_size"), + "expected snapshot_size + pt_size != memory_size error, got: {}", + msg + ); +} + +#[test] +fn snapshot_layout_pt_size_unaligned_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + rewrite_config(&path, |cfg| { + if let Some(p) = cfg["layout"]["pt_size"].as_u64() { + cfg["layout"]["pt_size"] = Value::from(p + 1); + } else { + cfg["layout"]["pt_size"] = Value::from(1u64); + } + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("pt_size") || msg.contains("PAGE_SIZE") || msg.contains("multiple"), + "expected pt_size validation error, got: {}", + msg + ); +} + +#[test] +fn missing_snapshot_blob_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let blobs_dir = path.join("blobs").join("sha256"); + let manifest_bytes = std::fs::read(path.join("index.json")).unwrap(); + let index: Value = serde_json::from_slice(&manifest_bytes).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + let manifest_path = blobs_dir.join(manifest_digest); + let manifest: Value = serde_json::from_slice(&std::fs::read(&manifest_path).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap(); + std::fs::remove_file(blobs_dir.join(snap_digest)).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("snapshot blob") || msg.contains("No such") || msg.contains("not found"), + "expected missing-blob error, got: {}", + msg + ); +} + +// ============================================================================= +// Path semantics. +// ============================================================================= + +#[test] +fn from_oci_nonexistent_path_returns_error() { + let err = unwrap_err_snapshot(Snapshot::from_oci("/nonexistent/path/to/oci", "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("stat") || msg.contains("No such") || msg.contains("not found"), + "expected missing-path error, got: {}", + msg + ); +} + +#[test] +fn from_oci_file_not_directory_rejected() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("not-a-dir"); + std::fs::write(&file_path, b"hello").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&file_path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("not a directory"), + "expected not-a-directory error, got: {}", + msg + ); +} + +#[test] +fn to_oci_appends_into_existing_layout_with_new_tag() { + // Two snapshots written to the same directory under different + // tags coexist. Both load back independently. The shared + // `oci-layout` marker and `blobs/sha256/` are reused. + let snap_a = create_snapshot_from_binary(); + let snap_b = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap_a.to_oci(&path, "a").unwrap(); + snap_b.to_oci(&path, "b").unwrap(); + + let _ = Snapshot::from_oci(&path, "a").unwrap(); + let _ = Snapshot::from_oci(&path, "b").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let tags: Vec<&str> = index["manifests"] + .as_array() + .unwrap() + .iter() + .map(|m| { + m["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap() + }) + .collect(); + assert_eq!(tags.len(), 2); + assert!(tags.contains(&"a")); + assert!(tags.contains(&"b")); +} + +#[test] +fn to_oci_replaces_descriptor_for_same_tag() { + // Writing the same tag twice replaces the manifest descriptor + // for that tag. The loader sees the second snapshot, not the + // first. The index ends up with exactly one entry for the tag. + let mut sbox = create_test_sandbox(); + sbox.call::("Echo", "first".to_string()).unwrap(); + let snap_first = sbox.snapshot().unwrap(); + sbox.call::("Echo", "second".to_string()).unwrap(); + let snap_second = sbox.snapshot().unwrap(); + let gen_second = snap_second.snapshot_generation(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap_first.to_oci(&path, "latest").unwrap(); + snap_second.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.snapshot_generation(), gen_second); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let entries: Vec<&Value> = index["manifests"] + .as_array() + .unwrap() + .iter() + .filter(|m| { + m["annotations"]["org.opencontainers.image.ref.name"].as_str() == Some("latest") + }) + .collect(); + assert_eq!(entries.len(), 1, "expected one descriptor for tag 'latest'"); +} + +#[test] +fn to_oci_requires_parent_dir_to_exist() { + // The leaf directory at `path` is created, but the parent + // chain must already exist. A missing ancestor errors and the + // filesystem is left untouched. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let missing_parent = dir.path().join("a").join("b").join("c"); + let path = missing_parent.join("store"); + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("parent directory") || msg.contains("not accessible"), + "expected missing-parent error, got: {msg}" + ); + assert!(!missing_parent.exists(), "no parent dirs should be created"); +} + +#[test] +fn to_oci_creates_leaf_directory() { + // The leaf at `path` is created when missing, as long as the + // parent exists. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + snap.to_oci(&path, "latest").unwrap(); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); +} + +#[test] +fn to_oci_rejects_regular_file_at_path() { + // A regular file at `path` cannot be turned into a directory. + // The call errors and the file is left intact. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("not-a-dir"); + std::fs::write(&path, b"i am a file").unwrap(); + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("is not a directory") || msg.contains("layout dir"), + "expected non-directory error, got: {msg}" + ); + assert_eq!(std::fs::read(&path).unwrap(), b"i am a file"); +} + +#[test] +fn to_oci_rejects_unsupported_existing_layout_version() { + // A pre-existing `oci-layout` with an unknown version is left + // alone and the call errors. Defends against silently rewriting + // a future layout we do not understand. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"99.0.0"}"#, + ) + .unwrap(); + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("imageLayoutVersion") || msg.contains("unsupported"), + "expected unsupported-version error, got: {msg}" + ); + assert!( + !path.join("index.json").exists(), + "to_oci must not have written index.json" + ); +} + +#[test] +fn to_oci_invalid_tag_does_not_touch_filesystem() { + // Tag grammar is checked before any filesystem mutation. An + // empty tag is rejected without creating the layout directory. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + let _ = snap.to_oci(&path, "").unwrap_err(); + assert!(!path.exists(), "target path must not be created on error"); + let leftovers: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .map(|e| e.file_name()) + .collect(); + assert!( + leftovers.is_empty(), + "unexpected leftover entries in parent: {:?}", + leftovers + ); +} + +#[test] +fn to_oci_into_empty_existing_directory() { + // An empty pre-existing directory is treated as a fresh layout + // location. The marker, index, and blobs are all written. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + + snap.to_oci(&path, "latest").unwrap(); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); + assert!(path.join("oci-layout").exists()); + assert!(path.join("index.json").exists()); +} + +#[test] +fn to_oci_preserves_unrelated_files_in_layout_dir() { + // Files inside the layout dir that are not part of the OCI + // structure are left alone. Mirrors the behaviour of every + // merging tool surveyed (containers/image, crane, regclient). + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write(path.join("README.md"), b"keep me").unwrap(); + + snap.to_oci(&path, "latest").unwrap(); + assert_eq!(std::fs::read(path.join("README.md")).unwrap(), b"keep me"); +} + +#[test] +fn to_oci_same_tag_same_content_is_idempotent() { + // Saving the same snapshot under the same tag twice ends up + // with one descriptor for the tag and the same blob count as + // after the first save. Blobs are content-addressed so the + // second write reuses them. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap.to_oci(&path, "latest").unwrap(); + let blobs_after_first: Vec<_> = std::fs::read_dir(path.join("blobs").join("sha256")) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.file_name())) + .collect(); + + snap.to_oci(&path, "latest").unwrap(); + let blobs_after_second: Vec<_> = std::fs::read_dir(path.join("blobs").join("sha256")) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.file_name())) + .collect(); + assert_eq!(blobs_after_first.len(), blobs_after_second.len()); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifests = index["manifests"].as_array().unwrap(); + assert_eq!(manifests.len(), 1); + assert_eq!( + manifests[0]["annotations"]["org.opencontainers.image.ref.name"], + "latest" + ); +} + +#[test] +fn to_oci_shares_blobs_across_tags_with_identical_content() { + // Two tags written from the same in-memory snapshot share all + // three blobs (manifest, config, snapshot). The blob directory + // therefore holds exactly three files even with two tags. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + + snap.to_oci(&path, "a").unwrap(); + snap.to_oci(&path, "b").unwrap(); + + let blobs: Vec<_> = std::fs::read_dir(path.join("blobs").join("sha256")) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.file_name())) + .collect(); + assert_eq!(blobs.len(), 3, "expected 3 deduped blobs, got {:?}", blobs); +} + +#[test] +fn to_oci_replace_in_middle_preserves_other_tags() { + // Replacing one tag in a layout with three tags keeps the + // other two descriptors intact and produces a fresh descriptor + // for the replaced tag. + let mut sbox = create_test_sandbox(); + let snap_a = sbox.snapshot().unwrap(); + sbox.call::("Echo", "x".to_string()).unwrap(); + let snap_b = sbox.snapshot().unwrap(); + sbox.call::("Echo", "y".to_string()).unwrap(); + let snap_c = sbox.snapshot().unwrap(); + sbox.call::("Echo", "z".to_string()).unwrap(); + let snap_b2 = sbox.snapshot().unwrap(); + let gen_b2 = snap_b2.snapshot_generation(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + snap_a.to_oci(&path, "a").unwrap(); + snap_b.to_oci(&path, "b").unwrap(); + snap_c.to_oci(&path, "c").unwrap(); + snap_b2.to_oci(&path, "b").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let tags: Vec<&str> = index["manifests"] + .as_array() + .unwrap() + .iter() + .map(|m| { + m["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap() + }) + .collect(); + assert_eq!(tags.len(), 3); + assert!(tags.contains(&"a")); + assert!(tags.contains(&"b")); + assert!(tags.contains(&"c")); + + let loaded_b = Snapshot::from_oci(&path, "b").unwrap(); + assert_eq!(loaded_b.snapshot_generation(), gen_b2); +} + +#[test] +fn to_oci_rejects_malformed_existing_oci_layout_json() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write(path.join("oci-layout"), b"not json").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("oci-layout") && msg.contains("JSON"), + "expected oci-layout JSON error, got: {msg}" + ); + assert!(!path.join("index.json").exists()); +} + +#[test] +fn to_oci_rejects_existing_oci_layout_missing_version() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write(path.join("oci-layout"), br#"{"other":"field"}"#).unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("imageLayoutVersion"), + "expected missing-version error, got: {msg}" + ); + assert!(!path.join("index.json").exists()); +} + +#[test] +fn to_oci_rejects_malformed_existing_index_json() { + // An existing `oci-layout` with a supported version plus a + // corrupt `index.json` is rejected. We do not silently discard + // someone else's index. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store"); + std::fs::create_dir_all(&path).unwrap(); + std::fs::write( + path.join("oci-layout"), + br#"{"imageLayoutVersion":"1.0.0"}"#, + ) + .unwrap(); + std::fs::write(path.join("index.json"), b"{not valid json").unwrap(); + + let err = snap.to_oci(&path, "latest").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("index.json"), + "expected index.json error, got: {msg}" + ); + assert_eq!( + std::fs::read(path.join("index.json")).unwrap(), + b"{not valid json", + "to_oci must not overwrite a malformed existing index.json" + ); +} + +/// Asserts the integrity contract: a snapshot blob whose bytes have +/// been replaced (without changing length, so descriptor sizes still +/// match) must be rejected on load via digest mismatch. +#[test] +fn from_oci_rejects_snapshot_blob_byte_mutation() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + // Locate the snapshot blob via the manifest, then flip one byte + // somewhere in the middle. Length is preserved so all descriptor + // size checks still pass. Only a digest re-hash can detect this. + let blobs_dir = path.join("blobs").join("sha256"); + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifest_digest = index["manifests"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + let manifest: Value = + serde_json::from_slice(&std::fs::read(blobs_dir.join(&manifest_digest)).unwrap()).unwrap(); + let snap_digest = manifest["layers"][0]["digest"] + .as_str() + .unwrap() + .strip_prefix("sha256:") + .unwrap() + .to_string(); + let snap_path = blobs_dir.join(&snap_digest); + let mut bytes = std::fs::read(&snap_path).unwrap(); + let mid = bytes.len() / 2; + bytes[mid] ^= 0xFF; + std::fs::write(&snap_path, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("hash") || msg.contains("sha256"), + "expected digest-mismatch error, got: {}", + msg + ); +} + +/// Same idea as `from_oci_rejects_snapshot_blob_byte_mutation`, but +/// targeting the config blob. A config-blob mutation that preserves +/// the descriptor size and the structural fields the loader +/// validates today (e.g. flipping a byte inside the host-function +/// flatbuffer payload) must be caught by digest verification. +#[test] +fn from_oci_rejects_config_blob_byte_mutation() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let cfg_path = find_config_blob(&path); + let mut bytes = std::fs::read(&cfg_path).unwrap(); + // Replace the first ASCII brace `{` with a different byte that + // keeps the file the same length but yields a different sha256. + // This will also break JSON parsing, but the point is to assert + // the digest layer rejects it before the parser ever runs. + bytes[0] = b' '; + std::fs::write(&cfg_path, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("hash") || msg.contains("sha256"), + "expected digest-mismatch error, got: {}", + msg + ); +} + +#[test] +fn from_oci_observes_per_path_contents() { + // `to_oci` no longer permits overwriting, so verifying that two + // independent saves have independent contents is done by writing + // each snapshot to its own path and asserting the loaded + // contents differ. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 11i32).unwrap(); + let snap_x = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let path_x = dir.path().join("snap_x"); + snap_x.to_oci(&path_x, "latest").unwrap(); + + let loaded_x = Snapshot::from_oci(&path_x, "latest").unwrap(); + let mut sbox_x = + MultiUseSandbox::from_snapshot(Arc::new(loaded_x), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 11); + + sbox.call::("AddToStatic", 44i32).unwrap(); + let snap_y = sbox.snapshot().unwrap(); + let path_y = dir.path().join("snap_y"); + snap_y.to_oci(&path_y, "latest").unwrap(); + + let loaded_y = Snapshot::from_oci(&path_y, "latest").unwrap(); + let mut sbox_y = + MultiUseSandbox::from_snapshot(Arc::new(loaded_y), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_y.call::("GetStatic", ()).unwrap(), 55); +} + +// ============================================================================= +// Exhaustive input-validation tests for `from_oci`. +// +// Every load-side error path in `super::file::from_oci` should be +// exercised here. +// ============================================================================= + +fn save_for_mutation() -> (tempfile::TempDir, std::path::PathBuf) { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + (dir, path) +} + +fn assert_err_contains(err: crate::HyperlightError, needle: &str) { + let msg = format!("{}", err); + assert!( + msg.contains(needle), + "expected error to contain {:?}, got: {}", + needle, + msg + ); +} + +#[test] +fn malformed_oci_layout_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("oci-layout"), b"not-valid-json{").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "oci-layout"); +} + +#[test] +fn oci_layout_missing_version_field_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("oci-layout"), r#"{"unrelated":"field"}"#).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "imageLayoutVersion"); +} + +#[test] +fn malformed_index_json_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::write(path.join("index.json"), b"{not json").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "index.json"); +} + +#[test] +fn empty_index_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + idx["manifests"] = Value::Array(Vec::new()); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn from_oci_rejects_duplicate_tag_in_index() { + // A valid OCI layout has unique tags. Two manifests sharing the + // same `org.opencontainers.image.ref.name` annotation is + // malformed and from_oci must refuse rather than silently + // pick one. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let first = idx["manifests"][0].clone(); + idx["manifests"].as_array_mut().unwrap().push(first); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "multiple manifests tagged"); +} + +#[test] +fn missing_manifest_blob_rejected() { + let (_dir, path) = save_for_mutation(); + std::fs::remove_file(manifest_path(&path)).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("open") || msg.contains("No such") || msg.contains("not found"), + "expected missing-manifest error, got: {}", + msg + ); +} + +#[test] +fn bad_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + // Strip the algorithm prefix entirely. `oci-spec` validates + // descriptor digests on parse, so the index parser rejects + // this before our own digest helper sees it. + idx["manifests"][0]["digest"] = Value::from("deadbeef"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("digest") || msg.contains("index.json"), + "expected digest or parse error, got: {}", + msg + ); +} + +#[test] +fn malformed_manifest_json_rejected() { + // Probes the manifest JSON parser. Under `from_oci`, the + // digest-verification step would fire first and short-circuit + // this; that path is covered by + // `from_oci_rejects_manifest_blob_byte_mutation`. Use + // `from_oci_unchecked` here to reach the parser. + let (_dir, path) = save_for_mutation(); + let mp = manifest_path(&path); + std::fs::write(&mp, b"{not json").unwrap(); + // Update index size to match so we hit the JSON parser, not the + // size check. + let new_len = std::fs::metadata(&mp).unwrap().len(); + rewrite_index(&path, |idx| { + idx["manifests"][0]["size"] = Value::from(new_len); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "manifest"); +} + +#[test] +fn wrong_manifest_schema_version_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["schemaVersion"] = Value::from(99u32); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "schemaVersion"); +} + +#[test] +fn unknown_config_media_type_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["config"]["mediaType"] = Value::from("application/vnd.example.unknown.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "config media type"); +} + +#[test] +fn empty_layers_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"] = Value::Array(Vec::new()); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "layer"); +} + +#[test] +fn extra_layers_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + let first = m["layers"][0].clone(); + m["layers"].as_array_mut().unwrap().push(first); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "layer"); +} + +#[test] +fn unknown_snapshot_layer_media_type_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"][0]["mediaType"] = Value::from("application/vnd.example.unknown.v1"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "snapshot layer media type"); +} + +/// Manifest- and index-level annotations injected by third-party +/// tools (cosign, ORAS, build pipelines, etc.) must NOT break load. +/// `OciSnapshotConfig` is intentionally strict (`deny_unknown_fields`) but +/// the OCI envelope around it is parsed via `oci-spec`'s lenient +/// types. +#[test] +fn manifest_and_index_annotations_tolerated() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + rewrite_manifest(&path, |m| { + let mut anns = serde_json::Map::new(); + anns.insert( + "org.opencontainers.image.created".to_string(), + Value::from("2024-01-01T00:00:00Z"), + ); + anns.insert( + "dev.sigstore.cosign/signature".to_string(), + Value::from("MEUCIQDsignature"), + ); + m["annotations"] = Value::Object(anns); + }); + rewrite_index(&path, |idx| { + let mut anns = serde_json::Map::new(); + anns.insert( + "org.opencontainers.image.ref.name".to_string(), + Value::from("v1.2.3"), + ); + idx["annotations"] = Value::Object(anns); + }); + + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn config_blob_size_descriptor_mismatch_rejected() { + let (_dir, path) = save_for_mutation(); + // Bump the config descriptor's claimed size by one without + // touching the actual blob. + rewrite_manifest(&path, |m| { + let sz = m["config"]["size"].as_u64().unwrap(); + m["config"]["size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "config blob size mismatch"); +} + +#[test] +fn malformed_config_json_rejected() { + // Probes the config JSON parser. Under `from_oci` the + // digest-verification step would fire first; that path is + // covered by `from_oci_rejects_config_blob_byte_mutation`. + // Use `from_oci_unchecked` here to reach the parser. + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + std::fs::write(&cfg_path, b"{not json").unwrap(); + // Update both the manifest's config descriptor size and the + // index's manifest descriptor size to match so we reach the + // JSON parser, not the size check. + let new_cfg_len = std::fs::metadata(&cfg_path).unwrap().len(); + rewrite_manifest(&path, |m| { + m["config"]["size"] = Value::from(new_cfg_len); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "config JSON"); +} + +#[test] +fn memory_size_zero_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + cfg["memory_size"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "memory_size"); +} + +#[test] +fn memory_size_unaligned_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let sz = cfg["memory_size"].as_u64().unwrap(); + cfg["memory_size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + // Either the page-alignment check or the file-size check trips. + // Both are valid signals that the value was rejected. + assert!( + msg.contains("memory_size") || msg.contains("PAGE_SIZE") || msg.contains("size"), + "expected memory_size rejection, got: {}", + msg + ); +} + +#[test] +fn bad_init_data_permissions_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + // 1u32 << 31 is well outside the defined READ|WRITE|EXECUTE bits. + cfg["layout"]["init_data_permissions"] = Value::from(0x8000_0000u32); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "init_data_permissions"); +} + +#[test] +fn entrypoint_addr_outside_snapshot_region_rejected() { + // A crafted config can claim any u64 as the entry point. The + // loader must refuse addresses that don't lie within + // [BASE_ADDRESS, BASE_ADDRESS + snapshot_size) so a malicious + // image can't direct execution into unmapped GPA space or into + // the writable scratch region. + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + // 0xDEAD_BEEF_0000 is far above any plausible snapshot + // region (snapshot_size is bounded by MAX_MEMORY_SIZE, + // ~16 GiB) and outside guest mapped memory. + entry["addr"] = Value::from(0xDEAD_BEEF_0000u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "entrypoint addr"); +} + +#[test] +fn entrypoint_addr_below_base_address_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + let entry = cfg["entrypoint"].as_object_mut().unwrap(); + // 0 is below BASE_ADDRESS (0x1000); rejected as "outside the + // snapshot region". + entry["addr"] = Value::from(0u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "entrypoint addr"); +} + +// ============================================================================= +// `from_oci_unchecked`: skips blob digest verification but still runs +// every other validator (OCI structure, descriptor sizes, schema +// versions, arch / hypervisor / ABI tags, layout bounds, entrypoint +// bounds). +// ============================================================================= + +#[test] +fn from_oci_unchecked_round_trips() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let loaded = Snapshot::from_oci_unchecked(&path, "latest").unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + let result: String = sbox2.call("Echo", "hi\n".to_string()).unwrap(); + assert_eq!(result, "hi\n"); +} + +#[test] +fn from_oci_unchecked_still_validates_config_fields() { + // Field-level validators (arch, abi, hypervisor, layout bounds, + // entrypoint bounds) must still fire under `from_oci_unchecked`. + // Use `rewrite_config` so the layout stays self-consistent + // (otherwise the checked path would also catch this via the + // descriptor-size check before the field validator runs). + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + cfg["arch"] = Value::from("aarch64"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("architecture") || msg.contains("arch"), + "expected architecture mismatch under from_oci_unchecked, got: {}", + msg + ); +} + +#[test] +fn from_oci_rejects_manifest_blob_byte_mutation() { + // Mutate a manifest body byte (without updating the index's + // descriptor digest) and confirm the loader catches it via + // digest verification before any of the field-level manifest + // validators (schema version, media type, etc.) run. + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snapshot.to_oci(&path, "latest").unwrap(); + + let mp = manifest_path(&path); + let mut bytes = std::fs::read(&mp).unwrap(); + // Flip the first byte. Length is preserved so the descriptor + // size check still passes; only digest verification can detect + // this. The byte will also break JSON parsing, but the digest + // check fires first. + bytes[0] ^= 0x20; + std::fs::write(&mp, &bytes).unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "digest mismatch"); +} + +#[test] +fn from_oci_unknown_tag_lists_available_tags() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "alpha").unwrap(); + + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "missing")); + let msg = format!("{}", err); + assert!( + msg.contains("no manifest tagged") && msg.contains("\"missing\""), + "expected unknown-tag error mentioning the requested tag, got: {}", + msg + ); + assert!( + msg.contains("alpha"), + "expected available-tags listing to include the actual tag, got: {}", + msg + ); +} + +#[test] +fn manifest_descriptor_carries_ref_name_annotation() { + // The OCI standard tag annotation must be set on the manifest + // descriptor in `index.json` so external tools (`oras`, + // `crane manifest`, `skopeo inspect`) see the tag. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "production-v3").unwrap(); + + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + let manifest = &index["manifests"][0]; + assert_eq!( + manifest["annotations"]["org.opencontainers.image.ref.name"] + .as_str() + .unwrap(), + "production-v3" + ); +} + +// ============================================================================= +// Tag validation. +// ============================================================================= + +#[test] +fn empty_tag_rejected_on_save() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap.to_oci(dir.path().join("snap"), "").unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn empty_tag_rejected_on_load() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("snap"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "")); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn tag_with_illegal_leading_char_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap + .to_oci(dir.path().join("snap"), ".dotleader") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); + + let err = snap + .to_oci(dir.path().join("snap"), "-dashleader") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn tag_with_illegal_chars_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let err = snap + .to_oci(dir.path().join("snap"), "with/slash") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); + + let err = snap + .to_oci(dir.path().join("snap"), "with space") + .unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +#[test] +fn long_tag_within_limit_accepted() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let tag: String = "a".repeat(128); + snap.to_oci(dir.path().join("snap"), &tag).unwrap(); + let _ = Snapshot::from_oci(dir.path().join("snap"), &tag).unwrap(); +} + +#[test] +fn over_long_tag_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let tag: String = "a".repeat(129); + let err = snap.to_oci(dir.path().join("snap"), &tag).unwrap_err(); + assert!(format!("{err}").contains("tag")); +} + +// ============================================================================= +// Save-shape invariants. Verify the on-disk JSON we hand to standard +// OCI tools matches what the spec prescribes. +// ============================================================================= + +#[test] +fn manifest_descriptor_uses_image_manifest_media_type() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let index: Value = + serde_json::from_slice(&std::fs::read(path.join("index.json")).unwrap()).unwrap(); + assert_eq!( + index["manifests"][0]["mediaType"].as_str().unwrap(), + "application/vnd.oci.image.manifest.v1+json" + ); +} + +#[test] +fn manifest_descriptor_non_image_manifest_rejected() { + // The loader must refuse to follow a descriptor that does not + // advertise an OCI image manifest, even if the blob it points + // at would parse. This prevents misuse where an image index or + // an unrelated artifact has been published under our tag. + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + rewrite_index(&path, |idx| { + idx["manifests"][0]["mediaType"] = Value::from("application/vnd.oci.image.index.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{}", err); + assert!( + msg.contains("unexpected media type"), + "expected manifest-descriptor media type error, got: {}", + msg + ); +} + +#[test] +fn manifest_uses_correct_config_and_layer_media_types() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let manifest: Value = + serde_json::from_slice(&std::fs::read(manifest_path(&path)).unwrap()).unwrap(); + assert_eq!( + manifest["config"]["mediaType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.config.v1+json" + ); + assert_eq!(manifest["layers"].as_array().unwrap().len(), 1); + assert_eq!( + manifest["layers"][0]["mediaType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.memory.v1" + ); + // `artifactType` mirrors `config.mediaType` so registries that surface + // the distribution-spec referrers API report a useful type, and tooling + // that predates `artifactType` and falls back to `config.mediaType` + // sees the same value. + assert_eq!( + manifest["artifactType"].as_str().unwrap(), + "application/vnd.hyperlight.snapshot.config.v1+json" + ); +} + +#[test] +fn manifest_missing_artifact_type_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + rewrite_manifest(&path, |m| { + m.as_object_mut().unwrap().remove("artifactType"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "missing required artifactType"); +} + +#[test] +fn manifest_mismatched_artifact_type_rejected() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + rewrite_manifest(&path, |m| { + m["artifactType"] = Value::from("application/vnd.example.bogus.v1+json"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "does not match config media type"); +} + +#[test] +fn save_writes_oci_layout_marker() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let marker: Value = + serde_json::from_slice(&std::fs::read(path.join("oci-layout")).unwrap()).unwrap(); + assert_eq!(marker["imageLayoutVersion"].as_str().unwrap(), "1.0.0"); +} + +// ============================================================================= +// Tag selection edge cases. +// ============================================================================= + +#[test] +fn tag_lookup_is_case_sensitive() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "MyTag").unwrap(); + + // Different case must NOT match. + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "mytag")); + assert_err_contains(err, "no manifest tagged"); + + // Exact case loads. + let _ = Snapshot::from_oci(&path, "MyTag").unwrap(); +} + +#[test] +fn ref_name_annotation_key_is_case_sensitive() { + // If the index uses a misspelled annotation key (e.g. + // `org.OpenContainers.image.ref.name`), the manifest is treated + // as untagged and from_oci must not load it under any name. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let anns = idx["manifests"][0]["annotations"].as_object_mut().unwrap(); + let value = anns.remove("org.opencontainers.image.ref.name").unwrap(); + anns.insert("org.OpenContainers.image.ref.name".to_string(), value); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn tag_with_all_valid_special_chars_accepted() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + let tag = "v1.2.3-rc.1_build"; + snap.to_oci(&path, tag).unwrap(); + let _ = Snapshot::from_oci(&path, tag).unwrap(); +} + +#[test] +fn other_descriptor_annotations_do_not_interfere() { + // A manifest descriptor with the standard ref.name annotation + // PLUS unrelated annotations (cosign signatures, build + // pipelines, etc.) must still resolve by tag. + let (_dir, path) = save_for_mutation(); + rewrite_index(&path, |idx| { + let anns = idx["manifests"][0]["annotations"].as_object_mut().unwrap(); + anns.insert( + "dev.sigstore.cosign/signature".to_string(), + Value::from("MEUCIQDfake"), + ); + anns.insert("io.example.build.id".to_string(), Value::from("12345")); + }); + let _ = Snapshot::from_oci(&path, "latest").unwrap(); +} + +// ============================================================================= +// Bad sha256 digest format on the inner descriptors (config and snapshot +// layer). The index-side equivalent is `bad_digest_format_rejected`. +// ============================================================================= + +#[test] +fn bad_config_descriptor_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["config"]["digest"] = Value::from("md5:deadbeef"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("digest"), + "expected digest-format error, got: {msg}" + ); +} + +#[test] +fn bad_snapshot_layer_descriptor_digest_format_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + m["layers"][0]["digest"] = Value::from("sha256:tooshort"); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("digest"), + "expected digest-format error, got: {msg}" + ); +} + +// ============================================================================= +// Missing inner blobs. +// ============================================================================= + +#[test] +fn missing_config_blob_rejected() { + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + std::fs::remove_file(&cfg_path).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("open") || msg.contains("No such") || msg.contains("not found"), + "expected missing-config-blob error, got: {msg}" + ); +} + +// ============================================================================= +// Size-bound enforcement. +// ============================================================================= + +#[test] +fn manifest_blob_too_large_rejected() { + // The manifest reader bounds to 1 MiB. Replace the manifest + // with junk longer than that and confirm the bound trips + // before any parsing. + let (_dir, path) = save_for_mutation(); + let mp = manifest_path(&path); + let huge = vec![b'a'; (1024 * 1024 + 16) as usize]; + std::fs::write(&mp, &huge).unwrap(); + // Update descriptor size to match so we hit the bound check, + // not the size mismatch check. + rewrite_index(&path, |idx| { + idx["manifests"][0]["size"] = Value::from(huge.len() as u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "exceeds maximum allowed"); +} + +#[test] +fn config_blob_too_large_rejected() { + let (_dir, path) = save_for_mutation(); + let cfg_path = find_config_blob(&path); + let huge = vec![b'a'; (1024 * 1024 + 16) as usize]; + std::fs::write(&cfg_path, &huge).unwrap(); + rewrite_manifest(&path, |m| { + m["config"]["size"] = Value::from(huge.len() as u64); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "exceeds maximum allowed"); +} + +#[test] +fn memory_size_too_large_rejected() { + let (_dir, path) = save_for_mutation(); + rewrite_config(&path, |cfg| { + // 16 GiB exceeds MAX_MEMORY_SIZE. + cfg["memory_size"] = Value::from(16u64 * 1024 * 1024 * 1024); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci(&path, "latest")); + assert_err_contains(err, "memory_size"); +} + +#[test] +fn snapshot_descriptor_size_disagrees_with_file_rejected() { + // Snapshot descriptor claims a different size than the actual + // blob file. The loader must reject before mmap-ing. + let (_dir, path) = save_for_mutation(); + rewrite_manifest(&path, |m| { + let sz = m["layers"][0]["size"].as_u64().unwrap(); + m["layers"][0]["size"] = Value::from(sz + 1); + }); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + let msg = format!("{err}"); + assert!( + msg.contains("snapshot blob size"), + "expected snapshot-blob descriptor disagreement error, got: {msg}" + ); +} + +// ============================================================================= +// `from_oci_unchecked` shares the same non-digest validators with +// `from_oci`. The key safety claim of the unchecked path is that it +// is faster, NOT that it is more permissive about anything other +// than digest checks. Pin that contract down here. +// ============================================================================= + +#[test] +fn from_oci_unchecked_validates_tag_format() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "bad/tag")); + assert_err_contains(err, "tag"); +} + +#[test] +fn from_oci_unchecked_rejects_unknown_tag() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "nosuch")); + assert_err_contains(err, "no manifest tagged"); +} + +#[test] +fn from_oci_unchecked_rejects_path_not_directory() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("not-a-dir"); + std::fs::write(&file_path, b"hi").unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&file_path, "latest")); + assert_err_contains(err, "not a directory"); +} + +#[test] +fn from_oci_unchecked_rejects_missing_oci_layout_marker() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + std::fs::remove_file(path.join("oci-layout")).unwrap(); + let err = unwrap_err_snapshot(Snapshot::from_oci_unchecked(&path, "latest")); + assert_err_contains(err, "oci-layout"); +} + +// ============================================================================= +// Round-trip data fidelity. +// +// The serde shape tests already prove individual fields parse, but +// they don't prove that all the values that came out of the producer +// reach the loaded snapshot. These tests pin down full round-trip +// fidelity for fields that are not exercised by the +// "load-then-call-the-guest" round-trip tests above. +// ============================================================================= + +#[test] +fn round_trip_preserves_stack_top_gva() { + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original = snap.stack_top_gva(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.stack_top_gva(), original); +} + +#[test] +fn round_trip_preserves_non_default_scratch_size() { + use crate::sandbox::SandboxConfiguration; + let mut cfg = SandboxConfiguration::default(); + let custom_scratch: usize = 256 * 1024; + cfg.set_scratch_size(custom_scratch); + let snap = Snapshot::from_env( + GuestBinary::FilePath(simple_guest_as_string().unwrap()), + cfg, + ) + .unwrap(); + let original = snap.layout().get_scratch_size(); + assert_eq!(original, custom_scratch); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + assert_eq!(loaded.layout().get_scratch_size(), custom_scratch); +} + +#[test] +fn pre_init_snapshot_writes_initialise_entrypoint_kind() { + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert_eq!(cfg["entrypoint"]["kind"].as_str().unwrap(), "initialise"); + assert!( + cfg["entrypoint"].get("sregs").is_none(), + "Initialise snapshot must not carry sregs in the config" + ); +} + +#[test] +fn already_initialised_snapshot_writes_call_entrypoint_kind() { + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert_eq!(cfg["entrypoint"]["kind"].as_str().unwrap(), "call"); + assert!( + cfg["entrypoint"]["sregs"].is_object(), + "Call snapshot must carry sregs in the config" + ); +} + +#[test] +fn round_trip_preserves_host_function_signatures() { + // Save a snapshot with a custom host function signature, load + // it, and confirm the recorded signatures survive. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snap = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + let funcs = cfg["host_functions"].as_array().unwrap(); + let add = funcs + .iter() + .find(|f| f["function_name"].as_str().unwrap() == "Add") + .expect("Add must be recorded"); + assert_eq!( + add["parameter_types"].as_array().unwrap().len(), + 2, + "Add signature must record two parameters" + ); + // Loading and using the snapshot must accept the same signature. + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _ = MultiUseSandbox::from_snapshot(Arc::new(loaded), host_funcs_with_matching_add(), None) + .unwrap(); +} + +#[test] +fn snapshot_with_no_host_functions_round_trips() { + // A snapshot with `host_functions: []` must round-trip without + // confusing the loader (which has special handling for the + // empty-vs-None case). + let snap = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "latest").unwrap(); + + let cfg: Value = + serde_json::from_slice(&std::fs::read(find_config_blob(&path)).unwrap()).unwrap(); + assert!( + cfg["host_functions"].as_array().unwrap().is_empty(), + "expected empty host_functions array for pre-init snapshot" + ); + + // The default HostFunctions set is sufficient because the + // snapshot requires nothing. + let loaded = Snapshot::from_oci(&path, "latest").unwrap(); + let _ = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); +} + +// ============================================================================= +// Snapshot lineage and restore semantics. +// +// Hyperlight's snapshot model is NOT a tree. Each `MultiUseSandbox` +// has a process-local `sandbox_id`; `snapshot()` tags the snapshot +// with that id; `from_snapshot(snap)` adopts `snap.sandbox_id()` so +// the new sandbox can restore back to it; and `restore(snap)` +// requires `self.id == snap.sandbox_id()`. So sandboxes built from +// clones of the same `Arc` form a flat id-equivalence +// class within which restore is freely interchangeable. +// +// These tests pin down all the combinations of build-from-snapshot, +// take-more-snapshots, restore-out-of-order, and reject-across-class +// that follow from that model. +// ============================================================================= + +#[test] +fn linear_chain_restore_in_order() { + // Take three snapshots at different states in one sandbox, then + // restore to each in chronological order. After each restore, + // the static counter must read the value it had when that + // snapshot was taken. + let mut sbox = create_test_sandbox(); + let s0 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let s10 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 20i32).unwrap(); + let s30 = sbox.snapshot().unwrap(); + + sbox.restore(s0.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.restore(s10.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 10); + sbox.restore(s30.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 30); +} + +#[test] +fn linear_chain_restore_out_of_order() { + // Restore through the same chain but in a non-monotonic order + // (forward, back, forward, back). Snapshots within one + // id-equivalence class are NOT ordered by when they were + // taken: any can be restored to from any other. + let mut sbox = create_test_sandbox(); + let s0 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 7i32).unwrap(); + let s7 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 100i32).unwrap(); + let s107 = sbox.snapshot().unwrap(); + + let order = [&s107, &s0, &s7, &s107, &s0]; + let expected = [107, 0, 7, 107, 0]; + for (snap, want) in order.iter().zip(expected.iter()) { + sbox.restore((*snap).clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), *want); + } +} + +#[test] +fn restore_then_call_then_snapshot_then_restore() { + // Restore changes the live state, but it must NOT invalidate + // the snapshot that was just used. After restoring to S1, the + // sandbox can still take a new snapshot and restore back to + // either S1 or the new one. + let mut sbox = create_test_sandbox(); + let s_init = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + + // Restore back to init. + sbox.restore(s_init.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + + // Mutate again, snapshot, mutate further. + sbox.call::("AddToStatic", 9i32).unwrap(); + let s_post_restore = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 100i32).unwrap(); + + // Restore to either reachable snapshot. + sbox.restore(s_post_restore.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 9); + sbox.restore(s_init.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn restore_idempotent() { + // Restoring to the same snapshot twice in a row must produce + // the same observable state both times. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 11i32).unwrap(); + let s = sbox.snapshot().unwrap(); + + sbox.call::("AddToStatic", 22i32).unwrap(); + sbox.restore(s.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 11); + + // No mutation between restores. + sbox.restore(s.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 11); + + // Mutation after the second restore must take effect. + sbox.call::("AddToStatic", 1i32).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 12); +} + +#[test] +fn from_snapshot_then_snapshot_then_restore_to_both() { + // Build sandbox B from snapshot S0 (B inherits S0's id). + // B takes its own snapshot S1 (also tagged with S0's id). Both + // S0 and S1 must be reachable from B via `restore`. + // + // Note: only snapshots taken from a RUNNING sandbox (with + // sregs) are valid restore targets. We therefore start from a + // snapshot of a running sandbox, not a pre-init snapshot. + let mut seed = create_test_sandbox(); + let s0 = seed.snapshot().unwrap(); + + let mut b = MultiUseSandbox::from_snapshot(s0.clone(), HostFunctions::default(), None).unwrap(); + b.call::("AddToStatic", 5i32).unwrap(); + let s1 = b.snapshot().unwrap(); + b.call::("AddToStatic", 10i32).unwrap(); + + // Restore back to S1. + b.restore(s1.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 5); + + // Restore back further to the constructor snapshot S0. + b.restore(s0.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn arc_clone_lineage_two_sandboxes_each_restores_to_either() { + // Two sandboxes built from the SAME Arc share the + // sandbox_id. Each takes its own snapshot. Each must be + // restorable to (a) its own derived snapshot, (b) the shared + // root snapshot, and (c) the OTHER sandbox's derived snapshot + // (because all four snapshots share one id). + // + // Note: the shared root must be a running-sandbox snapshot so + // that restore() can use its sregs. + let mut seed = create_test_sandbox(); + let snap_root = seed.snapshot().unwrap(); + + let mut a = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + let mut b = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + + a.call::("AddToStatic", 3i32).unwrap(); + let snap_a = a.snapshot().unwrap(); + + b.call::("AddToStatic", 70i32).unwrap(); + let snap_b = b.snapshot().unwrap(); + + // a: own snap then root then b's snap. + a.restore(snap_a.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 3); + a.restore(snap_root.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 0); + a.restore(snap_b.clone()).unwrap(); + assert_eq!(a.call::("GetStatic", ()).unwrap(), 70); + + // b: cross-restore the other way. + b.restore(snap_a.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 3); + b.restore(snap_root.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); + b.restore(snap_b.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 70); +} + +#[test] +fn separate_from_snapshot_calls_share_id_class_through_lineage() { + // Build sandbox A from a running-sandbox snapshot snap_root. + // A takes snap_a. Then build sandbox B from snap_a (a different + // Arc, but B adopts snap_a.sandbox_id == snap_root.sandbox_id). + // B must be restorable to BOTH snap_a and snap_root because + // they all share one id. + let mut seed = create_test_sandbox(); + let snap_root = seed.snapshot().unwrap(); + + let mut a = + MultiUseSandbox::from_snapshot(snap_root.clone(), HostFunctions::default(), None).unwrap(); + a.call::("AddToStatic", 5i32).unwrap(); + let snap_a = a.snapshot().unwrap(); + + let mut b = + MultiUseSandbox::from_snapshot(snap_a.clone(), HostFunctions::default(), None).unwrap(); + b.restore(snap_a.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 5); + b.restore(snap_root.clone()).unwrap(); + assert_eq!(b.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn separate_oci_loads_are_mutually_restore_compatible() { + // Each `from_oci` call rehydrates a structurally identical + // snapshot. Compatibility is determined by memory layout and + // host-function set, so a sandbox built from one load accepts + // a snapshot from any other load of the same image. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + + let s_x = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + let s_y = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + + let mut sbox_x = + MultiUseSandbox::from_snapshot(s_x.clone(), HostFunctions::default(), None).unwrap(); + sbox_x.restore(s_y.clone()).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 0); + + sbox_x.restore(s_x.clone()).unwrap(); + assert_eq!(sbox_x.call::("GetStatic", ()).unwrap(), 0); +} + +#[test] +fn oci_loaded_snapshot_supports_full_lifecycle() { + // Full round-trip: save (from a running sandbox so the loaded + // snapshot is a valid restore target), load, build sandbox, + // mutate, snapshot, mutate, restore, mutate, snapshot, restore. + // Both pre- and post-load snapshots in the loaded id class must + // remain restore-compatible across an arbitrary number of + // cycles. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("layout"); + snap.to_oci(&path, "v1").unwrap(); + + let loaded = Arc::new(Snapshot::from_oci(&path, "v1").unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox.call::("AddToStatic", 1i32).unwrap(); + let s1 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 2i32).unwrap(); + let s3 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 4i32).unwrap(); + + sbox.restore(s1.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 1); + sbox.restore(s3.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 3); + sbox.restore(loaded.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + + // Take a fresh snapshot post-restore. It is in the same id + // class and remains interchangeable with the others. + let s_post = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 50i32).unwrap(); + sbox.restore(s_post.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.restore(s3.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 3); +} + +#[test] +fn restore_invariant_under_intermediate_mutations() { + // Restoring to S followed by an arbitrary number of + // mutate-then-restore cycles must always produce the same + // post-restore observable state. This is the core "snapshot + // and restore really mean what they say" property. + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 13i32).unwrap(); + let s = sbox.snapshot().unwrap(); + + let mutations = [3, 5, 7, 11, 13, 17, 19]; + for m in mutations { + sbox.call::("AddToStatic", m).unwrap(); + sbox.restore(s.clone()).unwrap(); + assert_eq!( + sbox.call::("GetStatic", ()).unwrap(), + 13, + "restore must reset to the snapshotted value regardless of intermediate mutation {m}" + ); + } +} + +#[test] +fn many_arc_clones_one_snapshot_share_id() { + // Cloning Arc N times yields N references with + // identical sandbox_id. Each sandbox built from a clone shares + // the id and is mutually restore-compatible. Verifies that the + // id-equivalence-class semantics hold for arbitrary fan-out. + // + // The shared root must be a running-sandbox snapshot so the + // sandboxes can restore to it. + let mut seed = create_test_sandbox(); + let snap = seed.snapshot().unwrap(); + let mut sandboxes: Vec = (0..4) + .map(|_| { + MultiUseSandbox::from_snapshot(snap.clone(), HostFunctions::default(), None).unwrap() + }) + .collect(); + + // Each sandbox takes its own derived snapshot tagged with a + // unique value. + let mut snaps: Vec> = Vec::new(); + for (i, s) in sandboxes.iter_mut().enumerate() { + s.call::("AddToStatic", (i as i32 + 1) * 10).unwrap(); + snaps.push(s.snapshot().unwrap()); + } + + // Every sandbox can restore to every snapshot in the class. + for (i, sbox) in sandboxes.iter_mut().enumerate() { + for (j, target) in snaps.iter().enumerate() { + sbox.restore(target.clone()).unwrap(); + let want = (j as i32 + 1) * 10; + assert_eq!( + sbox.call::("GetStatic", ()).unwrap(), + want, + "sandbox {i} restored to snapshot {j} should observe value {want}" + ); + } + // And to the root snapshot. + sbox.restore(snap.clone()).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + } +} From 4cb8124713a11a098fc5df99b55763b07749c2ea Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:54:14 -0700 Subject: [PATCH 3/3] Add OCI snapshot benchmarks Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/benches/benchmarks.rs | 143 +++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 462e8908d..0f9ca5b2a 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -153,6 +153,15 @@ fn sandbox_lifecycle_benchmark(c: &mut Criterion) { ); } + // Isolates the cost of building a MultiUseSandbox from an + // already-resident Snapshot. The Snapshot is loaded outside the + // timed region. + for size in SandboxSize::all() { + group.bench_function(format!("sandbox_from_snapshot/{}", size.name()), |b| { + bench_sandbox_from_snapshot(b, size) + }); + } + group.finish(); } @@ -347,6 +356,25 @@ fn bench_snapshot_restore(b: &mut criterion::Bencher, size: SandboxSize) { }); } +fn bench_sandbox_from_snapshot(b: &mut criterion::Bencher, size: SandboxSize) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bench"); + { + let mut sbox = create_multiuse_sandbox_with_size(size); + let snapshot = sbox.snapshot().unwrap(); + snapshot.to_oci(&snap_path, "latest").unwrap(); + } + let loaded = std::sync::Arc::new(Snapshot::from_oci(&snap_path, "latest").unwrap()); + + b.iter(|| { + let _ = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + }); +} + fn snapshots_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("snapshots"); @@ -551,6 +579,118 @@ fn shared_memory_benchmark(c: &mut Criterion) { group.finish(); } +// ============================================================================ +// Benchmark Category: Snapshot Files +// ============================================================================ + +fn snapshot_file_benchmark(c: &mut Criterion) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let mut group = c.benchmark_group("snapshot_files"); + + // Pre-create OCI snapshot images for all sizes. + let dirs: Vec<_> = SandboxSize::all() + .iter() + .map(|size| { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join(size.name()); + let snapshot = { + let mut sbox = create_multiuse_sandbox_with_size(*size); + sbox.snapshot().unwrap() + }; + snapshot.to_oci(&snap_path, "latest").unwrap(); + (dir, snapshot, snap_path) + }) + .collect(); + + // Benchmark: save_snapshot. Wipe the layout between iterations + // so each save measures a fresh write rather than a tag-append. + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_dir = tempfile::tempdir().unwrap(); + let path = snap_dir.path().join("bench"); + let snapshot = &dirs[i].1; + group.bench_function(format!("save_snapshot/{}", size.name()), |b| { + b.iter_batched( + || { + let _ = std::fs::remove_dir_all(&path); + }, + |_| snapshot.to_oci(&path, "latest").unwrap(), + criterion::BatchSize::PerIteration, + ); + }); + } + + // Benchmark: load_snapshot (parse manifest + config + mmap blob). + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("load_snapshot/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_oci(&snap_path, "latest").unwrap(); + }); + }); + } + + // Benchmark: load_snapshot_unchecked (skip blob digest verification). + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("load_snapshot_unchecked/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_oci_unchecked(&snap_path, "latest").unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_evolve (new + evolve + call) + for size in SandboxSize::all() { + group.bench_function(format!("cold_start_via_evolve/{}", size.name()), |b| { + b.iter(|| { + let mut sbox = create_multiuse_sandbox_with_size(size); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot (load + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function(format!("cold_start_via_snapshot/{}", size.name()), |b| { + b.iter(|| { + let loaded = Snapshot::from_oci(&snap_path, "latest").unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot_unchecked (load unchecked + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].2.clone(); + group.bench_function( + format!("cold_start_via_snapshot_unchecked/{}", size.name()), + |b| { + b.iter(|| { + let loaded = Snapshot::from_oci_unchecked(&snap_path, "latest").unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }, + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); @@ -561,6 +701,7 @@ criterion_group! { guest_call_benchmark_large_param, function_call_serialization_benchmark, sample_workloads_benchmark, - shared_memory_benchmark + shared_memory_benchmark, + snapshot_file_benchmark } criterion_main!(benches);