From cdd13755a0f3cf5ec8d30499dff521dc4ae3a564 Mon Sep 17 00:00:00 2001 From: Marco Allegretti Date: Sun, 22 Feb 2026 18:36:15 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20karapace-schema=20=E2=80=94=20manifest?= =?UTF-8?q?=20v1,=20normalization,=20identity=20hashing,=20lock=20file=20v?= =?UTF-8?q?2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TOML manifest parsing with strict schema validation (deny_unknown_fields) - Deterministic normalization: sorted packages, deduplication, canonical JSON - Two-phase identity: preliminary (from manifest) and canonical (from lock) - Lock file v2: resolved packages with pinned versions, base image content digest - Dual lock verification: integrity (hash) and manifest intent (drift detection) - Built-in presets: dev, dev-rust, dev-python, gui-app, gaming, minimal - Blake3 256-bit hashing throughout --- crates/karapace-schema/Cargo.toml | 18 + .../karapace-schema/karapace-schema.cdx.json | 1229 +++++++++++++++++ crates/karapace-schema/src/identity.rs | 195 +++ crates/karapace-schema/src/lib.rs | 23 + crates/karapace-schema/src/lock.rs | 874 ++++++++++++ crates/karapace-schema/src/manifest.rs | 192 +++ crates/karapace-schema/src/normalize.rs | 224 +++ crates/karapace-schema/src/preset.rs | 143 ++ crates/karapace-schema/src/types.rs | 158 +++ 9 files changed, 3056 insertions(+) create mode 100644 crates/karapace-schema/Cargo.toml create mode 100644 crates/karapace-schema/karapace-schema.cdx.json create mode 100644 crates/karapace-schema/src/identity.rs create mode 100644 crates/karapace-schema/src/lib.rs create mode 100644 crates/karapace-schema/src/lock.rs create mode 100644 crates/karapace-schema/src/manifest.rs create mode 100644 crates/karapace-schema/src/normalize.rs create mode 100644 crates/karapace-schema/src/preset.rs create mode 100644 crates/karapace-schema/src/types.rs diff --git a/crates/karapace-schema/Cargo.toml b/crates/karapace-schema/Cargo.toml new file mode 100644 index 0000000..8f1c58d --- /dev/null +++ b/crates/karapace-schema/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "karapace-schema" +description = "Manifest parsing, normalization, identity hashing, and lock file for Karapace" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lints] +workspace = true + +[dependencies] +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +toml.workspace = true +blake3.workspace = true +tempfile.workspace = true diff --git a/crates/karapace-schema/karapace-schema.cdx.json b/crates/karapace-schema/karapace-schema.cdx.json new file mode 100644 index 0000000..eb9820d --- /dev/null +++ b/crates/karapace-schema/karapace-schema.cdx.json @@ -0,0 +1,1229 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.3", + "version": 1, + "serialNumber": "urn:uuid:da413f6f-ad72-4e83-80b1-317abb231aa6", + "metadata": { + "timestamp": "2026-02-22T14:03:10.544344100Z", + "tools": [ + { + "vendor": "CycloneDX", + "name": "cargo-cyclonedx", + "version": "0.5.5" + } + ], + "component": { + "type": "library", + "bom-ref": "path+file:///home/lateuf/Projects/Karapace/crates/karapace-schema#0.1.0", + "name": "karapace-schema", + "version": "0.1.0", + "description": "Manifest parsing, normalization, identity hashing, and lock file for Karapace", + "scope": "required", + "licenses": [ + { + "expression": "EUPL-1.2" + } + ], + "purl": "pkg:cargo/karapace-schema@0.1.0?download_url=file://.", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/marcoallegretti/karapace" + } + ], + "components": [ + { + "type": "library", + "bom-ref": "path+file:///home/lateuf/Projects/Karapace/crates/karapace-schema#0.1.0 bin-target-0", + "name": "karapace_schema", + "version": "0.1.0", + "purl": "pkg:cargo/karapace-schema@0.1.0?download_url=file://.#src/lib.rs" + } + ] + } + }, + "components": [ + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#arrayref@0.3.9", + "name": "arrayref", + "version": "0.3.9", + "description": "Macros to take array references of slices", + "scope": "required", + "licenses": [ + { + "expression": "BSD-2-Clause" + } + ], + "purl": "pkg:cargo/arrayref@0.3.9", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/arrayref" + }, + { + "type": "vcs", + "url": "https://github.com/droundy/arrayref" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#arrayvec@0.7.6", + "name": "arrayvec", + "version": "0.7.6", + "description": "A vector with fixed capacity, backed by an array (it can be stored on the stack too). Implements fixed capacity ArrayVec and ArrayString.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/arrayvec@0.7.6", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/arrayvec/" + }, + { + "type": "vcs", + "url": "https://github.com/bluss/arrayvec" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#bitflags@2.11.0", + "name": "bitflags", + "version": "2.11.0", + "description": "A macro to generate structures which behave like bitflags. ", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/bitflags@2.11.0", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/bitflags" + }, + { + "type": "website", + "url": "https://github.com/bitflags/bitflags" + }, + { + "type": "vcs", + "url": "https://github.com/bitflags/bitflags" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#blake3@1.8.3", + "name": "blake3", + "version": "1.8.3", + "description": "the BLAKE3 hash function", + "scope": "required", + "licenses": [ + { + "expression": "CC0-1.0 OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception" + } + ], + "purl": "pkg:cargo/blake3@1.8.3", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/blake3" + }, + { + "type": "vcs", + "url": "https://github.com/BLAKE3-team/BLAKE3" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#cc@1.2.56", + "name": "cc", + "version": "1.2.56", + "description": "A build-time dependency for Cargo build scripts to assist in invoking the native C compiler to compile native C code into a static archive to be linked into Rust code. ", + "scope": "excluded", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/cc@1.2.56", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/cc" + }, + { + "type": "website", + "url": "https://github.com/rust-lang/cc-rs" + }, + { + "type": "vcs", + "url": "https://github.com/rust-lang/cc-rs" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#cfg-if@1.0.4", + "name": "cfg-if", + "version": "1.0.4", + "description": "A macro to ergonomically define an item depending on a large number of #[cfg] parameters. Structured like an if-else chain, the first matching branch is the item that gets emitted. ", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/cfg-if@1.0.4", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/rust-lang/cfg-if" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#constant_time_eq@0.4.2", + "name": "constant_time_eq", + "version": "0.4.2", + "description": "Compares two equal-sized byte strings in constant time.", + "scope": "required", + "licenses": [ + { + "expression": "CC0-1.0 OR MIT-0 OR Apache-2.0" + } + ], + "purl": "pkg:cargo/constant_time_eq@0.4.2", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/constant_time_eq" + }, + { + "type": "vcs", + "url": "https://github.com/cesarb/constant_time_eq" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#cpufeatures@0.2.17", + "name": "cpufeatures", + "version": "0.2.17", + "description": "Lightweight runtime CPU feature detection for aarch64, loongarch64, and x86/x86_64 targets, with no_std support and support for mobile targets including Android and iOS ", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/cpufeatures@0.2.17", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/cpufeatures" + }, + { + "type": "vcs", + "url": "https://github.com/RustCrypto/utils" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#equivalent@1.0.2", + "name": "equivalent", + "version": "1.0.2", + "description": "Traits for key comparison in maps.", + "scope": "required", + "licenses": [ + { + "expression": "Apache-2.0 OR MIT" + } + ], + "purl": "pkg:cargo/equivalent@1.0.2", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/indexmap-rs/equivalent" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#errno@0.3.14", + "name": "errno", + "version": "0.3.14", + "description": "Cross-platform interface to the `errno` variable.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/errno@0.3.14", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/errno" + }, + { + "type": "vcs", + "url": "https://github.com/lambda-fairy/rust-errno" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#fastrand@2.3.0", + "name": "fastrand", + "version": "2.3.0", + "description": "A simple and fast random number generator", + "scope": "required", + "licenses": [ + { + "expression": "Apache-2.0 OR MIT" + } + ], + "purl": "pkg:cargo/fastrand@2.3.0", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/smol-rs/fastrand" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#find-msvc-tools@0.1.9", + "name": "find-msvc-tools", + "version": "0.1.9", + "description": "Find windows-specific tools, read MSVC versions from the registry and from COM interfaces", + "scope": "excluded", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/find-msvc-tools@0.1.9", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/find-msvc-tools" + }, + { + "type": "vcs", + "url": "https://github.com/rust-lang/cc-rs" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#getrandom@0.4.1", + "name": "getrandom", + "version": "0.4.1", + "description": "A small cross-platform library for retrieving random data from system source", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/getrandom@0.4.1", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/getrandom" + }, + { + "type": "vcs", + "url": "https://github.com/rust-random/getrandom" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#hashbrown@0.16.1", + "name": "hashbrown", + "version": "0.16.1", + "description": "A Rust port of Google's SwissTable hash map", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/hashbrown@0.16.1", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/rust-lang/hashbrown" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#indexmap@2.13.0", + "name": "indexmap", + "version": "2.13.0", + "description": "A hash table with consistent order and fast iteration.", + "scope": "required", + "licenses": [ + { + "expression": "Apache-2.0 OR MIT" + } + ], + "purl": "pkg:cargo/indexmap@2.13.0", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/indexmap/" + }, + { + "type": "vcs", + "url": "https://github.com/indexmap-rs/indexmap" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#itoa@1.0.17", + "name": "itoa", + "version": "1.0.17", + "description": "Fast integer primitive to string conversion", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/itoa@1.0.17", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/itoa" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/itoa" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#libc@0.2.180", + "name": "libc", + "version": "0.2.180", + "description": "Raw FFI bindings to platform libraries like libc.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/libc@0.2.180", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/rust-lang/libc" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#linux-raw-sys@0.11.0", + "name": "linux-raw-sys", + "version": "0.11.0", + "description": "Generated bindings for Linux's userspace API", + "scope": "required", + "licenses": [ + { + "expression": "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT" + } + ], + "purl": "pkg:cargo/linux-raw-sys@0.11.0", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/linux-raw-sys" + }, + { + "type": "vcs", + "url": "https://github.com/sunfishcode/linux-raw-sys" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#memchr@2.8.0", + "name": "memchr", + "version": "2.8.0", + "description": "Provides extremely fast (uses SIMD on x86_64, aarch64 and wasm32) routines for 1, 2 or 3 byte search and single substring search. ", + "scope": "required", + "licenses": [ + { + "expression": "Unlicense OR MIT" + } + ], + "purl": "pkg:cargo/memchr@2.8.0", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/memchr/" + }, + { + "type": "website", + "url": "https://github.com/BurntSushi/memchr" + }, + { + "type": "vcs", + "url": "https://github.com/BurntSushi/memchr" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#once_cell@1.21.3", + "name": "once_cell", + "version": "1.21.3", + "description": "Single assignment cells and lazy values.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/once_cell@1.21.3", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/once_cell" + }, + { + "type": "vcs", + "url": "https://github.com/matklad/once_cell" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#proc-macro2@1.0.106", + "name": "proc-macro2", + "version": "1.0.106", + "description": "A substitute implementation of the compiler's `proc_macro` API to decouple token-based libraries from the procedural macro use case.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/proc-macro2@1.0.106", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/proc-macro2" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/proc-macro2" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#quote@1.0.44", + "name": "quote", + "version": "1.0.44", + "description": "Quasi-quoting macro quote!(...)", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/quote@1.0.44", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/quote/" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/quote" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#rustix@1.1.3", + "name": "rustix", + "version": "1.1.3", + "description": "Safe Rust bindings to POSIX/Unix/Linux/Winsock-like syscalls", + "scope": "required", + "licenses": [ + { + "expression": "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT" + } + ], + "purl": "pkg:cargo/rustix@1.1.3", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/rustix" + }, + { + "type": "vcs", + "url": "https://github.com/bytecodealliance/rustix" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228", + "name": "serde", + "version": "1.0.228", + "description": "A generic serialization/deserialization framework", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/serde@1.0.228", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/serde" + }, + { + "type": "website", + "url": "https://serde.rs" + }, + { + "type": "vcs", + "url": "https://github.com/serde-rs/serde" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#serde_core@1.0.228", + "name": "serde_core", + "version": "1.0.228", + "description": "Serde traits only, with no support for derive -- use the `serde` crate instead", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/serde_core@1.0.228", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/serde_core" + }, + { + "type": "website", + "url": "https://serde.rs" + }, + { + "type": "vcs", + "url": "https://github.com/serde-rs/serde" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#serde_derive@1.0.228", + "name": "serde_derive", + "version": "1.0.228", + "description": "Macros 1.1 implementation of #[derive(Serialize, Deserialize)]", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/serde_derive@1.0.228", + "externalReferences": [ + { + "type": "documentation", + "url": "https://serde.rs/derive.html" + }, + { + "type": "website", + "url": "https://serde.rs" + }, + { + "type": "vcs", + "url": "https://github.com/serde-rs/serde" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#serde_json@1.0.149", + "name": "serde_json", + "version": "1.0.149", + "description": "A JSON serialization file format", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/serde_json@1.0.149", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/serde_json" + }, + { + "type": "vcs", + "url": "https://github.com/serde-rs/json" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#serde_spanned@0.6.9", + "name": "serde_spanned", + "version": "0.6.9", + "description": "Serde-compatible spanned Value", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/serde_spanned@0.6.9", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/toml-rs/toml" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#shlex@1.3.0", + "name": "shlex", + "version": "1.3.0", + "description": "Split a string into shell words, like Python's shlex.", + "scope": "excluded", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/shlex@1.3.0", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/comex/rust-shlex" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#syn@2.0.117", + "name": "syn", + "version": "2.0.117", + "description": "Parser for Rust source code", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/syn@2.0.117", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/syn" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/syn" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#tempfile@3.25.0", + "name": "tempfile", + "version": "3.25.0", + "description": "A library for managing temporary files and directories.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/tempfile@3.25.0", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/tempfile" + }, + { + "type": "website", + "url": "https://stebalien.com/projects/tempfile-rs/" + }, + { + "type": "vcs", + "url": "https://github.com/Stebalien/tempfile" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#thiserror-impl@2.0.18", + "name": "thiserror-impl", + "version": "2.0.18", + "description": "Implementation detail of the `thiserror` crate", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/thiserror-impl@2.0.18", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/dtolnay/thiserror" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#thiserror@2.0.18", + "name": "thiserror", + "version": "2.0.18", + "description": "derive(Error)", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/thiserror@2.0.18", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/thiserror" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/thiserror" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#toml@0.8.23", + "name": "toml", + "version": "0.8.23", + "description": "A native Rust encoder and decoder of TOML-formatted files and streams. Provides implementations of the standard Serialize/Deserialize traits for TOML data to facilitate deserializing and serializing Rust structures. ", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/toml@0.8.23", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/toml-rs/toml" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#toml_datetime@0.6.11", + "name": "toml_datetime", + "version": "0.6.11", + "description": "A TOML-compatible datetime type", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/toml_datetime@0.6.11", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/toml-rs/toml" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#toml_edit@0.22.27", + "name": "toml_edit", + "version": "0.22.27", + "description": "Yet another format-preserving TOML parser.", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/toml_edit@0.22.27", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/toml-rs/toml" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#toml_write@0.1.2", + "name": "toml_write", + "version": "0.1.2", + "description": "A low-level interface for writing out TOML ", + "scope": "required", + "licenses": [ + { + "expression": "MIT OR Apache-2.0" + } + ], + "purl": "pkg:cargo/toml_write@0.1.2", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/toml-rs/toml" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#unicode-ident@1.0.24", + "name": "unicode-ident", + "version": "1.0.24", + "description": "Determine whether characters have the XID_Start or XID_Continue properties according to Unicode Standard Annex #31", + "scope": "required", + "licenses": [ + { + "expression": "(MIT OR Apache-2.0) AND Unicode-3.0" + } + ], + "purl": "pkg:cargo/unicode-ident@1.0.24", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/unicode-ident" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/unicode-ident" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#winnow@0.7.14", + "name": "winnow", + "version": "0.7.14", + "description": "A byte-oriented, zero-copy, parser combinators library", + "scope": "required", + "licenses": [ + { + "expression": "MIT" + } + ], + "purl": "pkg:cargo/winnow@0.7.14", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/winnow-rs/winnow" + } + ] + }, + { + "type": "library", + "bom-ref": "registry+https://github.com/rust-lang/crates.io-index#zmij@1.0.21", + "name": "zmij", + "version": "1.0.21", + "description": "A double-to-string conversion algorithm based on Schubfach and yy", + "scope": "required", + "licenses": [ + { + "expression": "MIT" + } + ], + "purl": "pkg:cargo/zmij@1.0.21", + "externalReferences": [ + { + "type": "documentation", + "url": "https://docs.rs/zmij" + }, + { + "type": "vcs", + "url": "https://github.com/dtolnay/zmij" + } + ] + } + ], + "dependencies": [ + { + "ref": "path+file:///home/lateuf/Projects/Karapace/crates/karapace-schema#0.1.0", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#blake3@1.8.3", + "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228", + "registry+https://github.com/rust-lang/crates.io-index#serde_json@1.0.149", + "registry+https://github.com/rust-lang/crates.io-index#tempfile@3.25.0", + "registry+https://github.com/rust-lang/crates.io-index#thiserror@2.0.18", + "registry+https://github.com/rust-lang/crates.io-index#toml@0.8.23" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#arrayref@0.3.9", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#arrayvec@0.7.6", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#bitflags@2.11.0", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#blake3@1.8.3", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#arrayref@0.3.9", + "registry+https://github.com/rust-lang/crates.io-index#arrayvec@0.7.6", + "registry+https://github.com/rust-lang/crates.io-index#cc@1.2.56", + "registry+https://github.com/rust-lang/crates.io-index#cfg-if@1.0.4", + "registry+https://github.com/rust-lang/crates.io-index#constant_time_eq@0.4.2", + "registry+https://github.com/rust-lang/crates.io-index#cpufeatures@0.2.17" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#cc@1.2.56", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#find-msvc-tools@0.1.9", + "registry+https://github.com/rust-lang/crates.io-index#shlex@1.3.0" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#cfg-if@1.0.4", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#constant_time_eq@0.4.2", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#cpufeatures@0.2.17", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#equivalent@1.0.2", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#errno@0.3.14", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#libc@0.2.180" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#fastrand@2.3.0", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#find-msvc-tools@0.1.9", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#getrandom@0.4.1", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#cfg-if@1.0.4", + "registry+https://github.com/rust-lang/crates.io-index#libc@0.2.180" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#hashbrown@0.16.1", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#indexmap@2.13.0", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#equivalent@1.0.2", + "registry+https://github.com/rust-lang/crates.io-index#hashbrown@0.16.1", + "registry+https://github.com/rust-lang/crates.io-index#serde_core@1.0.228" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#itoa@1.0.17", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#libc@0.2.180", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#linux-raw-sys@0.11.0", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#memchr@2.8.0", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#once_cell@1.21.3", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#proc-macro2@1.0.106", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#unicode-ident@1.0.24" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#quote@1.0.44", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#proc-macro2@1.0.106" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#rustix@1.1.3", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#bitflags@2.11.0", + "registry+https://github.com/rust-lang/crates.io-index#errno@0.3.14", + "registry+https://github.com/rust-lang/crates.io-index#libc@0.2.180", + "registry+https://github.com/rust-lang/crates.io-index#linux-raw-sys@0.11.0" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#serde_core@1.0.228", + "registry+https://github.com/rust-lang/crates.io-index#serde_derive@1.0.228" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#serde_core@1.0.228", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#serde_derive@1.0.228", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#proc-macro2@1.0.106", + "registry+https://github.com/rust-lang/crates.io-index#quote@1.0.44", + "registry+https://github.com/rust-lang/crates.io-index#syn@2.0.117" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#serde_json@1.0.149", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#itoa@1.0.17", + "registry+https://github.com/rust-lang/crates.io-index#memchr@2.8.0", + "registry+https://github.com/rust-lang/crates.io-index#serde_core@1.0.228", + "registry+https://github.com/rust-lang/crates.io-index#zmij@1.0.21" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#serde_spanned@0.6.9", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#shlex@1.3.0", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#syn@2.0.117", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#proc-macro2@1.0.106", + "registry+https://github.com/rust-lang/crates.io-index#quote@1.0.44", + "registry+https://github.com/rust-lang/crates.io-index#unicode-ident@1.0.24" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#tempfile@3.25.0", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#fastrand@2.3.0", + "registry+https://github.com/rust-lang/crates.io-index#getrandom@0.4.1", + "registry+https://github.com/rust-lang/crates.io-index#once_cell@1.21.3", + "registry+https://github.com/rust-lang/crates.io-index#rustix@1.1.3" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#thiserror-impl@2.0.18", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#proc-macro2@1.0.106", + "registry+https://github.com/rust-lang/crates.io-index#quote@1.0.44", + "registry+https://github.com/rust-lang/crates.io-index#syn@2.0.117" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#thiserror@2.0.18", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#thiserror-impl@2.0.18" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#toml@0.8.23", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228", + "registry+https://github.com/rust-lang/crates.io-index#serde_spanned@0.6.9", + "registry+https://github.com/rust-lang/crates.io-index#toml_datetime@0.6.11", + "registry+https://github.com/rust-lang/crates.io-index#toml_edit@0.22.27" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#toml_datetime@0.6.11", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#toml_edit@0.22.27", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#indexmap@2.13.0", + "registry+https://github.com/rust-lang/crates.io-index#serde@1.0.228", + "registry+https://github.com/rust-lang/crates.io-index#serde_spanned@0.6.9", + "registry+https://github.com/rust-lang/crates.io-index#toml_datetime@0.6.11", + "registry+https://github.com/rust-lang/crates.io-index#toml_write@0.1.2", + "registry+https://github.com/rust-lang/crates.io-index#winnow@0.7.14" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#toml_write@0.1.2", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#unicode-ident@1.0.24", + "dependsOn": [] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#winnow@0.7.14", + "dependsOn": [ + "registry+https://github.com/rust-lang/crates.io-index#memchr@2.8.0" + ] + }, + { + "ref": "registry+https://github.com/rust-lang/crates.io-index#zmij@1.0.21", + "dependsOn": [] + } + ] +} \ No newline at end of file diff --git a/crates/karapace-schema/src/identity.rs b/crates/karapace-schema/src/identity.rs new file mode 100644 index 0000000..eec28e8 --- /dev/null +++ b/crates/karapace-schema/src/identity.rs @@ -0,0 +1,195 @@ +use crate::normalize::NormalizedManifest; +use crate::types::{EnvId, ShortId}; +use serde::Serialize; + +/// Deterministic identity for an environment, derived from its manifest content. +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +pub struct EnvIdentity { + pub env_id: EnvId, + pub short_id: ShortId, +} + +/// Compute a **preliminary** environment identity from unresolved manifest data. +/// +/// This is NOT the canonical identity. The canonical identity is computed by +/// [`LockFile::compute_identity()`] after dependency resolution, which uses: +/// - Actual base image content digest (not tag name hash) +/// - Resolved package versions (not just package names) +/// - Full hardware/mount/runtime policy +/// +/// This function is used only for: +/// - The `init` command (before resolution has occurred) +/// - Internal lookup during rebuild (to find old environments) +/// +/// After `build`, the env_id stored in metadata comes from the lock file. +pub fn compute_env_id(normalized: &NormalizedManifest) -> EnvIdentity { + let mut hasher = blake3::Hasher::new(); + + hasher.update(normalized.canonical_json().as_bytes()); + + let base_digest = blake3::hash(normalized.base_image.as_bytes()) + .to_hex() + .to_string(); + hasher.update(base_digest.as_bytes()); + + for pkg in &normalized.system_packages { + hasher.update(format!("pkg:{pkg}").as_bytes()); + } + for app in &normalized.gui_apps { + hasher.update(format!("app:{app}").as_bytes()); + } + + if normalized.hardware_gpu { + hasher.update(b"hw:gpu"); + } + if normalized.hardware_audio { + hasher.update(b"hw:audio"); + } + + for mount in &normalized.mounts { + hasher.update( + format!( + "mount:{}:{}:{}", + mount.label, mount.host_path, mount.container_path + ) + .as_bytes(), + ); + } + + hasher.update(format!("backend:{}", normalized.runtime_backend).as_bytes()); + + if normalized.network_isolation { + hasher.update(b"net:isolated"); + } + if let Some(cpu) = normalized.cpu_shares { + hasher.update(format!("cpu:{cpu}").as_bytes()); + } + if let Some(mem) = normalized.memory_limit_mb { + hasher.update(format!("mem:{mem}").as_bytes()); + } + + let hex = hasher.finalize().to_hex().to_string(); + let short = hex[..12].to_owned(); + + EnvIdentity { + env_id: EnvId::new(hex), + short_id: ShortId::new(short), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::manifest::parse_manifest_str; + + #[test] + fn stable_id_for_equivalent_manifests() { + let a = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["git", "clang"] +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + let b = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["clang", "git"] +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + assert_eq!(compute_env_id(&a), compute_env_id(&b)); + } + + #[test] + fn different_inputs_produce_different_ids() { + let a = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["git"] +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + let b = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["git", "cmake"] +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + assert_ne!(compute_env_id(&a), compute_env_id(&b)); + } + + #[test] + fn backend_change_changes_id() { + let a = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[runtime] +backend = "namespace" +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + let b = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[runtime] +backend = "oci" +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + assert_ne!(compute_env_id(&a), compute_env_id(&b)); + } + + #[test] + fn short_id_is_12_chars() { + let n = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + let id = compute_env_id(&n); + assert_eq!(id.short_id.as_str().len(), 12); + assert!(id.env_id.as_str().starts_with(id.short_id.as_str())); + } +} diff --git a/crates/karapace-schema/src/lib.rs b/crates/karapace-schema/src/lib.rs new file mode 100644 index 0000000..2db559e --- /dev/null +++ b/crates/karapace-schema/src/lib.rs @@ -0,0 +1,23 @@ +//! Manifest parsing, normalization, lock files, and environment identity for Karapace. +//! +//! This crate defines the schema layer: TOML manifest parsing (`ManifestV1`), +//! normalized representations (`NormalizedManifest`), deterministic environment +//! identity computation (`compute_env_id`), lock file generation/verification +//! (`LockFile`), and built-in preset definitions. + +pub mod identity; +pub mod lock; +pub mod manifest; +pub mod normalize; +pub mod preset; +pub mod types; + +pub use identity::{compute_env_id, EnvIdentity}; +pub use lock::{LockError, LockFile, ResolutionResult, ResolvedPackage}; +pub use manifest::{ + parse_manifest_file, parse_manifest_str, BaseSection, GuiSection, HardwareSection, + ManifestError, ManifestV1, MountsSection, ResourceLimits, RuntimeSection, SystemSection, +}; +pub use normalize::{NormalizedManifest, NormalizedMount}; +pub use preset::{get_preset, list_presets, Preset, BUILTIN_PRESETS}; +pub use types::{EnvId, LayerHash, ObjectHash, ShortId}; diff --git a/crates/karapace-schema/src/lock.rs b/crates/karapace-schema/src/lock.rs new file mode 100644 index 0000000..3e8cea6 --- /dev/null +++ b/crates/karapace-schema/src/lock.rs @@ -0,0 +1,874 @@ +use crate::identity::EnvIdentity; +use crate::manifest::ManifestError; +use crate::normalize::{NormalizedManifest, NormalizedMount}; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::Path; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum LockError { + #[error("manifest error: {0}")] + Manifest(#[from] ManifestError), + #[error("lock file I/O error: {0}")] + Io(#[from] std::io::Error), + #[error("lock file parse error: {0}")] + Parse(#[from] toml::de::Error), + #[error("lock file serialize error: {0}")] + Serialize(#[from] toml::ser::Error), + #[error("lock file env_id mismatch: lock has '{lock_id}', recomputed '{computed_id}'")] + EnvIdMismatch { + lock_id: String, + computed_id: String, + }, + #[error("lock file manifest drift: {0}")] + ManifestDrift(String), +} + +/// A resolved package with pinned version. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, +} + +/// Result of dependency resolution against a base image. +#[derive(Debug, Clone)] +pub struct ResolutionResult { + /// Content hash (blake3) of the base image rootfs tarball. + pub base_image_digest: String, + /// Resolved packages with pinned versions. + pub resolved_packages: Vec, +} + +/// The lock file captures the fully resolved state of an environment. +/// +/// The env_id is computed deterministically from the locked fields, +/// not from unresolved manifest data. This guarantees: +/// same lockfile → same env_id → same environment. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct LockFile { + pub lock_version: u32, + pub env_id: String, + pub short_id: String, + + // Base image identity + pub base_image: String, + pub base_image_digest: String, + + // Resolved dependencies (version-pinned) + pub resolved_packages: Vec, + pub resolved_apps: Vec, + + // Runtime policy (included in hash contract) + pub runtime_backend: String, + pub hardware_gpu: bool, + pub hardware_audio: bool, + pub network_isolation: bool, + + // Mount policy + #[serde(default)] + pub mounts: Vec, + + // Resource limits + #[serde(default)] + pub cpu_shares: Option, + #[serde(default)] + pub memory_limit_mb: Option, +} + +impl LockFile { + /// Generate a lock file from a manifest and resolution results. + /// + /// The env_id is computed from the resolved state, ensuring that + /// identical resolved dependencies always produce the same identity. + pub fn from_resolved(normalized: &NormalizedManifest, resolution: &ResolutionResult) -> Self { + let mut resolved_packages = resolution.resolved_packages.clone(); + resolved_packages.sort(); + + let lock = LockFile { + lock_version: 2, + env_id: String::new(), // computed below + short_id: String::new(), + base_image: normalized.base_image.clone(), + base_image_digest: resolution.base_image_digest.clone(), + resolved_packages, + resolved_apps: normalized.gui_apps.clone(), + runtime_backend: normalized.runtime_backend.clone(), + hardware_gpu: normalized.hardware_gpu, + hardware_audio: normalized.hardware_audio, + network_isolation: normalized.network_isolation, + mounts: normalized.mounts.clone(), + cpu_shares: normalized.cpu_shares, + memory_limit_mb: normalized.memory_limit_mb, + }; + + let identity = lock.compute_identity(); + LockFile { + env_id: identity.env_id.into_inner(), + short_id: identity.short_id.into_inner(), + ..lock + } + } + + /// Compute the environment identity from the locked state. + /// + /// This is the canonical hash computation. It uses only resolved, + /// pinned data — never unresolved package names or image tags. + pub fn compute_identity(&self) -> EnvIdentity { + let mut hasher = blake3::Hasher::new(); + + // Base image: content digest, not tag name + hasher.update(format!("base_digest:{}", self.base_image_digest).as_bytes()); + + // Resolved packages: name@version (sorted) + for pkg in &self.resolved_packages { + hasher.update(format!("pkg:{}@{}", pkg.name, pkg.version).as_bytes()); + } + + // Apps (sorted by normalize) + for app in &self.resolved_apps { + hasher.update(format!("app:{app}").as_bytes()); + } + + // Hardware policy + if self.hardware_gpu { + hasher.update(b"hw:gpu"); + } + if self.hardware_audio { + hasher.update(b"hw:audio"); + } + + // Mount policy (sorted by label in normalize) + for mount in &self.mounts { + hasher.update( + format!( + "mount:{}:{}:{}", + mount.label, mount.host_path, mount.container_path + ) + .as_bytes(), + ); + } + + // Runtime backend + hasher.update(format!("backend:{}", self.runtime_backend).as_bytes()); + + // Network isolation + if self.network_isolation { + hasher.update(b"net:isolated"); + } + + // Resource limits + if let Some(cpu) = self.cpu_shares { + hasher.update(format!("cpu:{cpu}").as_bytes()); + } + if let Some(mem) = self.memory_limit_mb { + hasher.update(format!("mem:{mem}").as_bytes()); + } + + let hex = hasher.finalize().to_hex().to_string(); + let short = hex[..12].to_owned(); + + EnvIdentity { + env_id: crate::types::EnvId::new(hex), + short_id: crate::types::ShortId::new(short), + } + } + + /// Verify that this lock file is internally consistent + /// (stored env_id matches recomputed env_id). + pub fn verify_integrity(&self) -> Result { + let identity = self.compute_identity(); + if self.env_id != identity.env_id.as_str() { + return Err(LockError::EnvIdMismatch { + lock_id: self.env_id.clone(), + computed_id: identity.env_id.into_inner(), + }); + } + Ok(identity) + } + + /// Check that a manifest's declared intent matches this lock file. + /// + /// This catches cases where the manifest changed but the lock wasn't updated. + pub fn verify_manifest_intent(&self, normalized: &NormalizedManifest) -> Result<(), LockError> { + if self.base_image != normalized.base_image { + return Err(LockError::ManifestDrift(format!( + "base image changed: lock has '{}', manifest has '{}'", + self.base_image, normalized.base_image + ))); + } + if self.runtime_backend != normalized.runtime_backend { + return Err(LockError::ManifestDrift(format!( + "runtime backend changed: lock has '{}', manifest has '{}'", + self.runtime_backend, normalized.runtime_backend + ))); + } + + // Check that all declared packages are present in the lock + let locked_names: Vec<&str> = self + .resolved_packages + .iter() + .map(|p| p.name.as_str()) + .collect(); + for pkg in &normalized.system_packages { + if !locked_names.contains(&pkg.as_str()) { + return Err(LockError::ManifestDrift(format!( + "package '{pkg}' is in manifest but not in lock file. Run 'karapace build' to re-resolve." + ))); + } + } + + if self.hardware_gpu != normalized.hardware_gpu + || self.hardware_audio != normalized.hardware_audio + { + return Err(LockError::ManifestDrift( + "hardware policy changed. Run 'karapace build' to re-resolve.".to_owned(), + )); + } + + Ok(()) + } + + pub fn write_to_file(&self, path: impl AsRef) -> Result<(), LockError> { + let path = path.as_ref(); + let content = toml::to_string_pretty(self)?; + let dir = path.parent().unwrap_or(Path::new(".")); + let mut tmp = tempfile::NamedTempFile::new_in(dir)?; + std::io::Write::write_all(&mut tmp, content.as_bytes())?; + tmp.as_file().sync_all()?; + tmp.persist(path).map_err(|e| LockError::Io(e.error))?; + // Fsync parent directory to ensure rename durability on power loss. + if let Ok(f) = fs::File::open(dir) { + let _ = f.sync_all(); + } + Ok(()) + } + + pub fn read_from_file(path: impl AsRef) -> Result { + let content = fs::read_to_string(path)?; + Ok(toml::from_str(&content)?) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::manifest::parse_manifest_str; + + fn sample_normalized() -> NormalizedManifest { + parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["git", "clang"] +"#, + ) + .unwrap() + .normalize() + .unwrap() + } + + fn sample_resolution() -> ResolutionResult { + ResolutionResult { + base_image_digest: "a".repeat(64), + resolved_packages: vec![ + ResolvedPackage { + name: "clang".to_owned(), + version: "17.0.6-1".to_owned(), + }, + ResolvedPackage { + name: "git".to_owned(), + version: "2.44.0-1".to_owned(), + }, + ], + } + } + + #[test] + fn lock_roundtrip() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock = LockFile::from_resolved(&normalized, &resolution); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("karapace.lock"); + + lock.write_to_file(&path).unwrap(); + let loaded = LockFile::read_from_file(&path).unwrap(); + assert_eq!(lock, loaded); + } + + #[test] + fn lock_integrity_check_passes() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock = LockFile::from_resolved(&normalized, &resolution); + assert!(lock.verify_integrity().is_ok()); + } + + #[test] + fn lock_integrity_fails_on_tamper() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let mut lock = LockFile::from_resolved(&normalized, &resolution); + lock.env_id = "tampered".to_owned(); + assert!(lock.verify_integrity().is_err()); + } + + #[test] + fn lock_contains_real_digest() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock = LockFile::from_resolved(&normalized, &resolution); + // Digest is the actual image digest, not a hash of the tag name + assert_eq!(lock.base_image_digest, "a".repeat(64)); + assert_eq!(lock.base_image, "rolling"); + } + + #[test] + fn lock_contains_pinned_versions() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock = LockFile::from_resolved(&normalized, &resolution); + assert_eq!(lock.resolved_packages.len(), 2); + assert_eq!(lock.resolved_packages[0].name, "clang"); + assert_eq!(lock.resolved_packages[0].version, "17.0.6-1"); + assert_eq!(lock.resolved_packages[1].name, "git"); + assert_eq!(lock.resolved_packages[1].version, "2.44.0-1"); + } + + #[test] + fn same_resolution_same_identity() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock1 = LockFile::from_resolved(&normalized, &resolution); + let lock2 = LockFile::from_resolved(&normalized, &resolution); + assert_eq!(lock1.env_id, lock2.env_id); + } + + #[test] + fn different_versions_different_identity() { + let normalized = sample_normalized(); + let res1 = sample_resolution(); + let mut res2 = sample_resolution(); + res2.resolved_packages[1].version = "2.45.0-1".to_owned(); + + let lock1 = LockFile::from_resolved(&normalized, &res1); + let lock2 = LockFile::from_resolved(&normalized, &res2); + assert_ne!(lock1.env_id, lock2.env_id); + } + + #[test] + fn different_image_digest_different_identity() { + let normalized = sample_normalized(); + let mut res1 = sample_resolution(); + let mut res2 = sample_resolution(); + res1.base_image_digest = "a".repeat(64); + res2.base_image_digest = "b".repeat(64); + + let lock1 = LockFile::from_resolved(&normalized, &res1); + let lock2 = LockFile::from_resolved(&normalized, &res2); + assert_ne!(lock1.env_id, lock2.env_id); + } + + #[test] + fn manifest_intent_verified() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock = LockFile::from_resolved(&normalized, &resolution); + assert!(lock.verify_manifest_intent(&normalized).is_ok()); + } + + #[test] + fn manifest_drift_detected() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock = LockFile::from_resolved(&normalized, &resolution); + + // Change the manifest + let mut drifted = normalized.clone(); + drifted.base_image = "ubuntu/24.04".to_owned(); + assert!(lock.verify_manifest_intent(&drifted).is_err()); + } + + #[test] + fn includes_hardware_policy_in_identity() { + let mut n1 = sample_normalized(); + let mut n2 = sample_normalized(); + n1.hardware_gpu = false; + n2.hardware_gpu = true; + let res = sample_resolution(); + let lock1 = LockFile::from_resolved(&n1, &res); + let lock2 = LockFile::from_resolved(&n2, &res); + assert_ne!(lock1.env_id, lock2.env_id); + } + + // --- A1: Determinism Hardening --- + + #[test] + fn hash_stable_across_repeated_invocations() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let mut ids = Vec::new(); + for _ in 0..100 { + let lock = LockFile::from_resolved(&normalized, &resolution); + ids.push(lock.env_id.clone()); + } + let first = &ids[0]; + for (i, id) in ids.iter().enumerate() { + assert_eq!(first, id, "invocation {i} produced different env_id"); + } + } + + #[test] + fn hash_stable_with_randomized_package_order() { + let normalized = sample_normalized(); + // Create resolutions with packages in different orders + let res_ab = ResolutionResult { + base_image_digest: "a".repeat(64), + resolved_packages: vec![ + ResolvedPackage { + name: "alpha".to_owned(), + version: "1.0".to_owned(), + }, + ResolvedPackage { + name: "beta".to_owned(), + version: "2.0".to_owned(), + }, + ResolvedPackage { + name: "gamma".to_owned(), + version: "3.0".to_owned(), + }, + ], + }; + let res_ba = ResolutionResult { + base_image_digest: "a".repeat(64), + resolved_packages: vec![ + ResolvedPackage { + name: "gamma".to_owned(), + version: "3.0".to_owned(), + }, + ResolvedPackage { + name: "alpha".to_owned(), + version: "1.0".to_owned(), + }, + ResolvedPackage { + name: "beta".to_owned(), + version: "2.0".to_owned(), + }, + ], + }; + let lock_ab = LockFile::from_resolved(&normalized, &res_ab); + let lock_ba = LockFile::from_resolved(&normalized, &res_ba); + assert_eq!( + lock_ab.env_id, lock_ba.env_id, + "package order must not affect env_id (sorted in from_resolved)" + ); + } + + #[test] + fn hash_stable_with_randomized_mount_order() { + use crate::normalize::NormalizedMount; + let mut n1 = sample_normalized(); + n1.mounts = vec![ + NormalizedMount { + label: "cache".to_owned(), + host_path: "/a".to_owned(), + container_path: "/b".to_owned(), + }, + NormalizedMount { + label: "work".to_owned(), + host_path: "/c".to_owned(), + container_path: "/d".to_owned(), + }, + ]; + let mut n2 = sample_normalized(); + n2.mounts = vec![ + NormalizedMount { + label: "work".to_owned(), + host_path: "/c".to_owned(), + container_path: "/d".to_owned(), + }, + NormalizedMount { + label: "cache".to_owned(), + host_path: "/a".to_owned(), + container_path: "/b".to_owned(), + }, + ]; + // Mounts are sorted by label in normalize(), but from_resolved doesn't re-sort. + // The hash input iterates mounts in order. For determinism, mounts must be + // pre-sorted by the caller (normalize). Test that identical sorted mounts hash equally. + n1.mounts.sort_by(|a, b| a.label.cmp(&b.label)); + n2.mounts.sort_by(|a, b| a.label.cmp(&b.label)); + let res = sample_resolution(); + let lock1 = LockFile::from_resolved(&n1, &res); + let lock2 = LockFile::from_resolved(&n2, &res); + assert_eq!(lock1.env_id, lock2.env_id); + } + + #[test] + fn cross_platform_path_normalization() { + // Verify that path separators in mount specs don't break determinism. + // On all platforms, mount paths are stored as-is from the manifest + // (which uses forward slashes). This test confirms no OS-dependent + // path mangling occurs. + use crate::normalize::NormalizedMount; + let mut n1 = sample_normalized(); + n1.mounts = vec![NormalizedMount { + label: "src".to_owned(), + host_path: "/home/user/src".to_owned(), + container_path: "/workspace".to_owned(), + }]; + let res = sample_resolution(); + let lock = LockFile::from_resolved(&n1, &res); + + // The env_id must be a fixed known value regardless of platform + let lock2 = LockFile::from_resolved(&n1, &res); + assert_eq!(lock.env_id, lock2.env_id); + // env_id must be exactly 64 hex chars + assert_eq!(lock.env_id.len(), 64); + assert!(lock.env_id.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn identical_inputs_produce_identical_hash_bytes() { + let normalized = sample_normalized(); + let resolution = sample_resolution(); + let lock1 = LockFile::from_resolved(&normalized, &resolution); + let lock2 = LockFile::from_resolved(&normalized, &resolution); + // Byte-level comparison of the full 64-char hex string + assert_eq!( + lock1.env_id.as_bytes(), + lock2.env_id.as_bytes(), + "hash bytes must be identical for identical inputs" + ); + assert_eq!(lock1.short_id.as_bytes(), lock2.short_id.as_bytes(),); + } + + // --- IG-M5: Golden-value cross-machine determinism tests --- + // + // These tests hardcode expected blake3 hashes for fixed inputs. + // If any of these fail, it means compute_identity() has changed behavior, + // which would break cross-machine reproducibility and existing lock files. + // The golden values were computed once and must remain stable forever. + + fn golden_lock( + base_digest: &str, + packages: &[(&str, &str)], + mounts: &[(&str, &str, &str)], + backend: &str, + gpu: bool, + audio: bool, + network_isolation: bool, + ) -> LockFile { + let resolved_packages: Vec = packages + .iter() + .map(|(n, v)| ResolvedPackage { + name: n.to_string(), + version: v.to_string(), + }) + .collect(); + let mount_specs: Vec = mounts + .iter() + .map(|(l, h, c)| NormalizedMount { + label: l.to_string(), + host_path: h.to_string(), + container_path: c.to_string(), + }) + .collect(); + let normalized = NormalizedManifest { + manifest_version: 1, + base_image: "rolling".to_owned(), + system_packages: packages.iter().map(|(n, _)| n.to_string()).collect(), + gui_apps: Vec::new(), + hardware_gpu: gpu, + hardware_audio: audio, + mounts: mount_specs, + runtime_backend: backend.to_owned(), + network_isolation, + cpu_shares: None, + memory_limit_mb: None, + }; + let resolution = ResolutionResult { + base_image_digest: base_digest.to_owned(), + resolved_packages, + }; + LockFile::from_resolved(&normalized, &resolution) + } + + #[test] + fn golden_identity_empty_manifest() { + let lock = golden_lock("sha256:abc123", &[], &[], "mock", false, false, false); + assert_eq!( + lock.env_id, "aabaeaeda3b27db42054f64719a16afd49e72b4fc6e8493e2fce9d862d240806", + "golden hash for empty manifest must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_with_packages() { + let lock = golden_lock( + "sha256:abc123", + &[("curl", "7.88.1"), ("git", "2.39.2")], + &[], + "namespace", + false, + false, + false, + ); + assert_eq!( + lock.env_id, "dfea3163e5925ee788a97fae24d9ec08f774c29c64c9180befe771d877e62f18", + "golden hash for manifest with packages must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_with_mounts_and_hardware() { + let lock = golden_lock( + "sha256:abc123", + &[("vim", "9.0.1")], + &[("home", "/home/user", "/home")], + "namespace", + true, + true, + false, + ); + assert_eq!( + lock.env_id, "d6ca89829da264240d0508bd58bffc28c2014f643426bbecff3db5a525793546", + "golden hash for manifest with mounts+hardware must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_network_isolation_differs() { + let lock = golden_lock("sha256:abc123", &[], &[], "mock", false, false, true); + assert_eq!( + lock.env_id, "dcdae57b3749d0aa2d3948de9fde99ceedad34deaef9b618c2d9f939dac25596", + "golden hash for network-isolated manifest must be stable across all platforms" + ); + // Must differ from the non-isolated empty manifest + assert_ne!( + lock.env_id, "aabaeaeda3b27db42054f64719a16afd49e72b4fc6e8493e2fce9d862d240806", + "network isolation must produce a different hash" + ); + } + + #[allow(clippy::too_many_arguments)] + fn golden_lock_full( + base_digest: &str, + packages: &[(&str, &str)], + mounts: &[(&str, &str, &str)], + apps: &[&str], + backend: &str, + gpu: bool, + audio: bool, + network_isolation: bool, + cpu_shares: Option, + memory_limit_mb: Option, + ) -> LockFile { + let resolved_packages: Vec = packages + .iter() + .map(|(n, v)| ResolvedPackage { + name: n.to_string(), + version: v.to_string(), + }) + .collect(); + let mount_specs: Vec = mounts + .iter() + .map(|(l, h, c)| NormalizedMount { + label: l.to_string(), + host_path: h.to_string(), + container_path: c.to_string(), + }) + .collect(); + let normalized = NormalizedManifest { + manifest_version: 1, + base_image: "rolling".to_owned(), + system_packages: packages.iter().map(|(n, _)| n.to_string()).collect(), + gui_apps: apps.iter().map(ToString::to_string).collect(), + hardware_gpu: gpu, + hardware_audio: audio, + mounts: mount_specs, + runtime_backend: backend.to_owned(), + network_isolation, + cpu_shares, + memory_limit_mb, + }; + let resolution = ResolutionResult { + base_image_digest: base_digest.to_owned(), + resolved_packages, + }; + LockFile::from_resolved(&normalized, &resolution) + } + + #[test] + fn golden_identity_with_cpu_shares() { + let lock = golden_lock_full( + "sha256:abc123", + &[], + &[], + &[], + "mock", + false, + false, + false, + Some(1024), + None, + ); + assert_eq!( + lock.env_id, "d966f9ee1c5e8959ae29d0483c45fc66813ec47201aa9f26c6371336b3dfd252", + "golden hash for cpu_shares=1024 must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_with_memory_limit() { + let lock = golden_lock_full( + "sha256:abc123", + &[], + &[], + &[], + "mock", + false, + false, + false, + None, + Some(4096), + ); + assert_eq!( + lock.env_id, "74823889e305b7b28394508b5813568faf9c814b4ef8f1f97e8d3dcd9a7a6bae", + "golden hash for memory_limit_mb=4096 must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_with_apps() { + let lock = golden_lock_full( + "sha256:abc123", + &[], + &[], + &["firefox", "code"], + "mock", + false, + false, + false, + None, + None, + ); + assert_eq!( + lock.env_id, "1aaf066c7b1e18178e838b0cf33c0bc67cd7401e586df826daa9033178ccfdf3", + "golden hash for gui_apps=[firefox,code] must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_with_cpu_and_memory() { + let lock = golden_lock_full( + "sha256:abc123", + &[("curl", "7.88.1")], + &[("data", "/mnt/data", "/data")], + &["vlc"], + "namespace", + true, + true, + true, + Some(2048), + Some(8192), + ); + assert_eq!( + lock.env_id, "44f9547036b4f24f8fe32844f2672804020c6260e29b7f72e17fd29d441ebc27", + "golden hash for fully-populated manifest must be stable across all platforms" + ); + } + + #[test] + fn golden_identity_gpu_only_differs_from_audio_only() { + let gpu_lock = golden_lock_full( + "sha256:abc123", + &[], + &[], + &[], + "mock", + true, + false, + false, + None, + None, + ); + let audio_lock = golden_lock_full( + "sha256:abc123", + &[], + &[], + &[], + "mock", + false, + true, + false, + None, + None, + ); + assert_eq!( + gpu_lock.env_id, "f761765ba48777bcc64c2cd5169cb44be27bcd2d6587c64c28bc98fa0964b266", + "golden hash for gpu-only must be stable" + ); + assert_eq!( + audio_lock.env_id, "428d91b41a03c1625e01bab1278ef231fb186833bff80a6bdc8227a2276f4318", + "golden hash for audio-only must be stable" + ); + assert_ne!( + gpu_lock.env_id, audio_lock.env_id, + "gpu-only and audio-only must produce different hashes" + ); + } + + #[test] + fn hash_sensitive_to_all_fields() { + let base_norm = sample_normalized(); + let base_res = sample_resolution(); + let base_id = LockFile::from_resolved(&base_norm, &base_res).env_id; + + // Change each field and verify the hash changes + let mut n = base_norm.clone(); + n.network_isolation = !n.network_isolation; + assert_ne!( + LockFile::from_resolved(&n, &base_res).env_id, + base_id, + "network_isolation" + ); + + let mut n = base_norm.clone(); + n.cpu_shares = Some(1024); + assert_ne!( + LockFile::from_resolved(&n, &base_res).env_id, + base_id, + "cpu_shares" + ); + + let mut n = base_norm.clone(); + n.memory_limit_mb = Some(4096); + assert_ne!( + LockFile::from_resolved(&n, &base_res).env_id, + base_id, + "memory_limit_mb" + ); + + let mut n = base_norm.clone(); + n.runtime_backend = "oci".to_owned(); + assert_ne!( + LockFile::from_resolved(&n, &base_res).env_id, + base_id, + "runtime_backend" + ); + + let mut n = base_norm.clone(); + n.gui_apps = vec!["new-app".to_owned()]; + assert_ne!( + LockFile::from_resolved(&n, &base_res).env_id, + base_id, + "gui_apps" + ); + } +} diff --git a/crates/karapace-schema/src/manifest.rs b/crates/karapace-schema/src/manifest.rs new file mode 100644 index 0000000..288bc89 --- /dev/null +++ b/crates/karapace-schema/src/manifest.rs @@ -0,0 +1,192 @@ +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::fs; +use std::path::Path; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ManifestError { + #[error("failed to read manifest file: {0}")] + Io(#[from] std::io::Error), + #[error("failed to parse manifest: {0}")] + ParseToml(#[from] toml::de::Error), + #[error("unsupported manifest_version: {0}, expected 1")] + UnsupportedVersion(u32), + #[error("base.image must not be empty")] + EmptyBaseImage, + #[error("mount label must not be empty")] + EmptyMountLabel, + #[error("invalid mount declaration for '{label}': '{spec}', expected ':'")] + InvalidMount { label: String, spec: String }, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct ManifestV1 { + pub manifest_version: u32, + pub base: BaseSection, + #[serde(default)] + pub system: SystemSection, + #[serde(default)] + pub gui: GuiSection, + #[serde(default)] + pub hardware: HardwareSection, + #[serde(default)] + pub mounts: MountsSection, + #[serde(default)] + pub runtime: RuntimeSection, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct BaseSection { + pub image: String, +} + +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct SystemSection { + #[serde(default)] + pub packages: Vec, +} + +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct GuiSection { + #[serde(default)] + pub apps: Vec, +} + +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct HardwareSection { + #[serde(default)] + pub gpu: bool, + #[serde(default)] + pub audio: bool, +} + +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)] +pub struct MountsSection { + #[serde(flatten)] + pub entries: BTreeMap, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct RuntimeSection { + #[serde(default = "default_backend")] + pub backend: String, + #[serde(default)] + pub network_isolation: bool, + #[serde(default)] + pub resource_limits: ResourceLimits, +} + +impl Default for RuntimeSection { + fn default() -> Self { + Self { + backend: default_backend(), + network_isolation: false, + resource_limits: ResourceLimits::default(), + } + } +} + +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct ResourceLimits { + #[serde(default)] + pub cpu_shares: Option, + #[serde(default)] + pub memory_limit_mb: Option, +} + +fn default_backend() -> String { + "namespace".to_owned() +} + +pub fn parse_manifest_str(input: &str) -> Result { + Ok(toml::from_str(input)?) +} + +pub fn parse_manifest_file(path: impl AsRef) -> Result { + let content = fs::read_to_string(path)?; + parse_manifest_str(&content) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_full_manifest() { + let input = r#" +manifest_version = 1 + +[base] +image = "rolling" + +[system] +packages = ["clang", "cmake", "git"] + +[gui] +apps = ["ide", "debugger"] + +[hardware] +gpu = true +audio = true + +[mounts] +workspace = "./:/workspace" + +[runtime] +backend = "oci" +network_isolation = true + +[runtime.resource_limits] +cpu_shares = 1024 +memory_limit_mb = 4096 +"#; + let manifest = parse_manifest_str(input).expect("should parse"); + assert_eq!(manifest.manifest_version, 1); + assert_eq!(manifest.base.image, "rolling"); + assert_eq!(manifest.system.packages.len(), 3); + assert_eq!(manifest.runtime.backend, "oci"); + assert!(manifest.runtime.network_isolation); + assert_eq!(manifest.runtime.resource_limits.cpu_shares, Some(1024)); + } + + #[test] + fn parses_minimal_manifest() { + let input = r#" +manifest_version = 1 + +[base] +image = "rolling" +"#; + let manifest = parse_manifest_str(input).expect("should parse"); + assert_eq!(manifest.runtime.backend, "namespace"); + assert!(!manifest.runtime.network_isolation); + } + + #[test] + fn rejects_unknown_fields() { + let input = r#" +manifest_version = 1 + +[base] +image = "rolling" +unknown_field = true +"#; + assert!(parse_manifest_str(input).is_err()); + } + + #[test] + fn rejects_missing_base() { + let input = r" +manifest_version = 1 +"; + assert!(parse_manifest_str(input).is_err()); + } +} diff --git a/crates/karapace-schema/src/normalize.rs b/crates/karapace-schema/src/normalize.rs new file mode 100644 index 0000000..3de6aa1 --- /dev/null +++ b/crates/karapace-schema/src/normalize.rs @@ -0,0 +1,224 @@ +use crate::manifest::{ManifestError, ManifestV1}; +use serde::{Deserialize, Serialize}; + +/// Canonical, sorted, deduplicated representation of a parsed manifest. +/// +/// All optional fields are resolved to defaults, packages are sorted, and mounts +/// are validated. This is the input to identity hashing and lock file generation. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct NormalizedManifest { + pub manifest_version: u32, + pub base_image: String, + pub system_packages: Vec, + pub gui_apps: Vec, + pub hardware_gpu: bool, + pub hardware_audio: bool, + pub mounts: Vec, + pub runtime_backend: String, + pub network_isolation: bool, + pub cpu_shares: Option, + pub memory_limit_mb: Option, +} + +/// A validated bind-mount specification with label, host path, and container path. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct NormalizedMount { + pub label: String, + pub host_path: String, + pub container_path: String, +} + +impl ManifestV1 { + /// Normalize the manifest: validate fields, sort packages, resolve defaults. + pub fn normalize(&self) -> Result { + if self.manifest_version != 1 { + return Err(ManifestError::UnsupportedVersion(self.manifest_version)); + } + + let base_image = self.base.image.trim().to_owned(); + if base_image.is_empty() { + return Err(ManifestError::EmptyBaseImage); + } + + let mut mounts = Vec::with_capacity(self.mounts.entries.len()); + for (label, spec) in &self.mounts.entries { + let trimmed_label = label.trim().to_owned(); + if trimmed_label.is_empty() { + return Err(ManifestError::EmptyMountLabel); + } + let (host_path, container_path) = parse_mount_spec(label, spec)?; + mounts.push(NormalizedMount { + label: trimmed_label, + host_path, + container_path, + }); + } + mounts.sort_by(|a, b| a.label.cmp(&b.label)); + + let runtime_backend = self.runtime.backend.trim().to_lowercase(); + + Ok(NormalizedManifest { + manifest_version: self.manifest_version, + base_image, + system_packages: normalize_string_list(&self.system.packages), + gui_apps: normalize_string_list(&self.gui.apps), + hardware_gpu: self.hardware.gpu, + hardware_audio: self.hardware.audio, + mounts, + runtime_backend, + network_isolation: self.runtime.network_isolation, + cpu_shares: self.runtime.resource_limits.cpu_shares, + memory_limit_mb: self.runtime.resource_limits.memory_limit_mb, + }) + } +} + +impl NormalizedManifest { + pub fn canonical_json(&self) -> String { + serde_json::to_string(self).expect("normalized manifest serialization is infallible") + } +} + +fn parse_mount_spec(label: &str, spec: &str) -> Result<(String, String), ManifestError> { + let Some((host_raw, container_raw)) = spec.split_once(':') else { + return Err(ManifestError::InvalidMount { + label: label.to_owned(), + spec: spec.to_owned(), + }); + }; + + let host_path = host_raw.trim().to_owned(); + let container_path = container_raw.trim().to_owned(); + + if host_path.is_empty() || container_path.is_empty() { + return Err(ManifestError::InvalidMount { + label: label.to_owned(), + spec: spec.to_owned(), + }); + } + + Ok((host_path, container_path)) +} + +fn normalize_string_list(values: &[String]) -> Vec { + let mut out: Vec = values + .iter() + .map(|v| v.trim().to_owned()) + .filter(|v| !v.is_empty()) + .collect(); + out.sort(); + out.dedup(); + out +} + +#[cfg(test)] +mod tests { + use crate::manifest::parse_manifest_str; + + #[test] + fn normalizes_and_sorts_deterministically() { + let input = r#" +manifest_version = 1 + +[base] +image = "rolling" + +[system] +packages = ["git", "cmake", "git", "clang"] + +[gui] +apps = ["debugger", "ide"] + +[hardware] +gpu = true +audio = false + +[mounts] +workspace = "./:/workspace" +cache = "~/.cache:/cache" +"#; + let manifest = parse_manifest_str(input).unwrap(); + let normalized = manifest.normalize().unwrap(); + + assert_eq!(normalized.system_packages, vec!["clang", "cmake", "git"]); + assert_eq!(normalized.gui_apps, vec!["debugger", "ide"]); + assert_eq!(normalized.mounts[0].label, "cache"); + assert_eq!(normalized.mounts[1].label, "workspace"); + assert_eq!(normalized.runtime_backend, "namespace"); + } + + #[test] + fn equivalent_manifests_produce_same_canonical_json() { + let a = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["git", "clang"] +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + let b = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[system] +packages = ["clang", "git"] +"#, + ) + .unwrap() + .normalize() + .unwrap(); + + assert_eq!(a.canonical_json(), b.canonical_json()); + } + + #[test] + fn rejects_empty_base_image() { + let manifest = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = " " +"#, + ) + .unwrap(); + assert!(manifest.normalize().is_err()); + } + + #[test] + fn rejects_invalid_mount_spec() { + let manifest = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[mounts] +workspace = "./no-colon" +"#, + ) + .unwrap(); + assert!(manifest.normalize().is_err()); + } + + #[test] + fn runtime_backend_included_in_normalization() { + let manifest = parse_manifest_str( + r#" +manifest_version = 1 +[base] +image = "rolling" +[runtime] +backend = "OCI" +"#, + ) + .unwrap(); + let normalized = manifest.normalize().unwrap(); + assert_eq!(normalized.runtime_backend, "oci"); + } +} diff --git a/crates/karapace-schema/src/preset.rs b/crates/karapace-schema/src/preset.rs new file mode 100644 index 0000000..c99a613 --- /dev/null +++ b/crates/karapace-schema/src/preset.rs @@ -0,0 +1,143 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct Preset { + pub name: &'static str, + pub description: &'static str, + pub manifest: &'static str, +} + +pub const BUILTIN_PRESETS: &[Preset] = &[ + Preset { + name: "dev", + description: "Development environment with common build tools", + manifest: r#"manifest_version = 1 + +[base] +image = "rolling" + +[system] +packages = ["git", "curl", "wget", "vim", "gcc", "make", "cmake"] + +[runtime] +backend = "namespace" +"#, + }, + Preset { + name: "dev-rust", + description: "Rust development environment", + manifest: r#"manifest_version = 1 + +[base] +image = "rolling" + +[system] +packages = ["git", "curl", "gcc", "make", "rustup"] + +[runtime] +backend = "namespace" +"#, + }, + Preset { + name: "dev-python", + description: "Python development environment", + manifest: r#"manifest_version = 1 + +[base] +image = "rolling" + +[system] +packages = ["git", "curl", "python3", "python3-pip", "python3-venv"] + +[runtime] +backend = "namespace" +"#, + }, + Preset { + name: "gui-app", + description: "GUI application environment with GPU and audio passthrough", + manifest: r#"manifest_version = 1 + +[base] +image = "rolling" + +[hardware] +gpu = true +audio = true + +[runtime] +backend = "namespace" +"#, + }, + Preset { + name: "gaming", + description: "Gaming environment with GPU, audio, and Vulkan support", + manifest: r#"manifest_version = 1 + +[base] +image = "rolling" + +[system] +packages = ["mesa-dri", "vulkan-loader", "libvulkan1", "alsa-plugins"] + +[hardware] +gpu = true +audio = true + +[runtime] +backend = "namespace" +"#, + }, + Preset { + name: "minimal", + description: "Minimal environment with no extra packages", + manifest: r#"manifest_version = 1 + +[base] +image = "rolling" + +[runtime] +backend = "namespace" +"#, + }, +]; + +pub fn get_preset(name: &str) -> Option<&'static Preset> { + BUILTIN_PRESETS.iter().find(|p| p.name == name) +} + +pub fn list_presets() -> &'static [Preset] { + BUILTIN_PRESETS +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn all_presets_parse() { + for preset in BUILTIN_PRESETS { + let result = crate::parse_manifest_str(preset.manifest); + assert!( + result.is_ok(), + "preset '{}' failed to parse: {:?}", + preset.name, + result.err() + ); + } + } + + #[test] + fn get_preset_by_name() { + assert!(get_preset("dev").is_some()); + assert!(get_preset("nonexistent").is_none()); + } + + #[test] + fn all_presets_have_unique_names() { + let mut names: Vec<&str> = BUILTIN_PRESETS.iter().map(|p| p.name).collect(); + names.sort_unstable(); + names.dedup(); + assert_eq!(names.len(), BUILTIN_PRESETS.len()); + } +} diff --git a/crates/karapace-schema/src/types.rs b/crates/karapace-schema/src/types.rs new file mode 100644 index 0000000..be5506b --- /dev/null +++ b/crates/karapace-schema/src/types.rs @@ -0,0 +1,158 @@ +//! Newtype wrappers for string identifiers, providing compile-time type safety. +//! +//! All newtypes serialize/deserialize as plain strings for backward compatibility. + +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::ops::Deref; + +macro_rules! string_newtype { + ($(#[$meta:meta])* $name:ident) => { + $(#[$meta])* + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] + #[serde(transparent)] + pub struct $name(String); + + impl $name { + /// Create a new instance from a string. + pub fn new(s: impl Into) -> Self { + Self(s.into()) + } + + /// Return the inner string as a slice. + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Consume self and return the inner `String`. + pub fn into_inner(self) -> String { + self.0 + } + } + + impl Deref for $name { + type Target = str; + fn deref(&self) -> &str { + &self.0 + } + } + + impl fmt::Display for $name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } + } + + impl AsRef for $name { + fn as_ref(&self) -> &str { + &self.0 + } + } + + impl PartialEq for $name { + fn eq(&self, other: &str) -> bool { + self.0 == other + } + } + + impl PartialEq for $name { + fn eq(&self, other: &String) -> bool { + self.0 == *other + } + } + + impl PartialEq<$name> for String { + fn eq(&self, other: &$name) -> bool { + *self == other.0 + } + } + + impl AsRef for $name { + fn as_ref(&self) -> &std::path::Path { + std::path::Path::new(&self.0) + } + } + + impl From for $name { + fn from(s: String) -> Self { + Self(s) + } + } + + impl From<&str> for $name { + fn from(s: &str) -> Self { + Self(s.to_owned()) + } + } + }; +} + +string_newtype!( + /// Full 64-character hex environment identifier, derived from locked manifest content. + EnvId +); + +string_newtype!( + /// Truncated 12-character prefix of an [`EnvId`], used for display. + ShortId +); + +string_newtype!( + /// Blake3 hash of a content-addressable object in the store. + ObjectHash +); + +string_newtype!( + /// Blake3 hash identifying a layer manifest. + LayerHash +); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn env_id_display_and_as_ref() { + let id = EnvId::new("abc123"); + assert_eq!(id.to_string(), "abc123"); + assert_eq!(id.as_str(), "abc123"); + assert_eq!(AsRef::::as_ref(&id), "abc123"); + } + + #[test] + fn env_id_serde_roundtrip() { + let id = EnvId::new("deadbeef"); + let json = serde_json::to_string(&id).unwrap(); + assert_eq!(json, "\"deadbeef\""); + let back: EnvId = serde_json::from_str(&json).unwrap(); + assert_eq!(back, id); + } + + #[test] + fn short_id_from_str() { + let sid = ShortId::from("abc123def456"); + assert_eq!(sid.as_str(), "abc123def456"); + } + + #[test] + fn object_hash_into_inner() { + let h = ObjectHash::new("hash_value".to_owned()); + assert_eq!(h.into_inner(), "hash_value"); + } + + #[test] + fn layer_hash_equality() { + let a = LayerHash::new("same"); + let b = LayerHash::new("same"); + let c = LayerHash::new("diff"); + assert_eq!(a, b); + assert_ne!(a, c); + } + + #[test] + fn env_id_from_string() { + let s = String::from("test_id"); + let id: EnvId = s.into(); + assert_eq!(id.as_str(), "test_id"); + } +}