feat: karapace-schema — manifest v1, normalization, identity hashing, lock file v2

- TOML manifest parsing with strict schema validation (deny_unknown_fields)
- Deterministic normalization: sorted packages, deduplication, canonical JSON
- Two-phase identity: preliminary (from manifest) and canonical (from lock)
- Lock file v2: resolved packages with pinned versions, base image content digest
- Dual lock verification: integrity (hash) and manifest intent (drift detection)
- Built-in presets: dev, dev-rust, dev-python, gui-app, gaming, minimal
- Blake3 256-bit hashing throughout
This commit is contained in:
Marco Allegretti 2026-02-22 18:36:15 +01:00
parent 78d40c0d0a
commit cdd13755a0
9 changed files with 3056 additions and 0 deletions

View file

@ -0,0 +1,18 @@
[package]
name = "karapace-schema"
description = "Manifest parsing, normalization, identity hashing, and lock file for Karapace"
version.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true
[lints]
workspace = true
[dependencies]
serde.workspace = true
serde_json.workspace = true
thiserror.workspace = true
toml.workspace = true
blake3.workspace = true
tempfile.workspace = true

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,195 @@
use crate::normalize::NormalizedManifest;
use crate::types::{EnvId, ShortId};
use serde::Serialize;
/// Deterministic identity for an environment, derived from its manifest content.
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct EnvIdentity {
pub env_id: EnvId,
pub short_id: ShortId,
}
/// Compute a **preliminary** environment identity from unresolved manifest data.
///
/// This is NOT the canonical identity. The canonical identity is computed by
/// [`LockFile::compute_identity()`] after dependency resolution, which uses:
/// - Actual base image content digest (not tag name hash)
/// - Resolved package versions (not just package names)
/// - Full hardware/mount/runtime policy
///
/// This function is used only for:
/// - The `init` command (before resolution has occurred)
/// - Internal lookup during rebuild (to find old environments)
///
/// After `build`, the env_id stored in metadata comes from the lock file.
pub fn compute_env_id(normalized: &NormalizedManifest) -> EnvIdentity {
let mut hasher = blake3::Hasher::new();
hasher.update(normalized.canonical_json().as_bytes());
let base_digest = blake3::hash(normalized.base_image.as_bytes())
.to_hex()
.to_string();
hasher.update(base_digest.as_bytes());
for pkg in &normalized.system_packages {
hasher.update(format!("pkg:{pkg}").as_bytes());
}
for app in &normalized.gui_apps {
hasher.update(format!("app:{app}").as_bytes());
}
if normalized.hardware_gpu {
hasher.update(b"hw:gpu");
}
if normalized.hardware_audio {
hasher.update(b"hw:audio");
}
for mount in &normalized.mounts {
hasher.update(
format!(
"mount:{}:{}:{}",
mount.label, mount.host_path, mount.container_path
)
.as_bytes(),
);
}
hasher.update(format!("backend:{}", normalized.runtime_backend).as_bytes());
if normalized.network_isolation {
hasher.update(b"net:isolated");
}
if let Some(cpu) = normalized.cpu_shares {
hasher.update(format!("cpu:{cpu}").as_bytes());
}
if let Some(mem) = normalized.memory_limit_mb {
hasher.update(format!("mem:{mem}").as_bytes());
}
let hex = hasher.finalize().to_hex().to_string();
let short = hex[..12].to_owned();
EnvIdentity {
env_id: EnvId::new(hex),
short_id: ShortId::new(short),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::manifest::parse_manifest_str;
#[test]
fn stable_id_for_equivalent_manifests() {
let a = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "clang"]
"#,
)
.unwrap()
.normalize()
.unwrap();
let b = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["clang", "git"]
"#,
)
.unwrap()
.normalize()
.unwrap();
assert_eq!(compute_env_id(&a), compute_env_id(&b));
}
#[test]
fn different_inputs_produce_different_ids() {
let a = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git"]
"#,
)
.unwrap()
.normalize()
.unwrap();
let b = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "cmake"]
"#,
)
.unwrap()
.normalize()
.unwrap();
assert_ne!(compute_env_id(&a), compute_env_id(&b));
}
#[test]
fn backend_change_changes_id() {
let a = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[runtime]
backend = "namespace"
"#,
)
.unwrap()
.normalize()
.unwrap();
let b = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[runtime]
backend = "oci"
"#,
)
.unwrap()
.normalize()
.unwrap();
assert_ne!(compute_env_id(&a), compute_env_id(&b));
}
#[test]
fn short_id_is_12_chars() {
let n = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
"#,
)
.unwrap()
.normalize()
.unwrap();
let id = compute_env_id(&n);
assert_eq!(id.short_id.as_str().len(), 12);
assert!(id.env_id.as_str().starts_with(id.short_id.as_str()));
}
}

View file

@ -0,0 +1,23 @@
//! Manifest parsing, normalization, lock files, and environment identity for Karapace.
//!
//! This crate defines the schema layer: TOML manifest parsing (`ManifestV1`),
//! normalized representations (`NormalizedManifest`), deterministic environment
//! identity computation (`compute_env_id`), lock file generation/verification
//! (`LockFile`), and built-in preset definitions.
pub mod identity;
pub mod lock;
pub mod manifest;
pub mod normalize;
pub mod preset;
pub mod types;
pub use identity::{compute_env_id, EnvIdentity};
pub use lock::{LockError, LockFile, ResolutionResult, ResolvedPackage};
pub use manifest::{
parse_manifest_file, parse_manifest_str, BaseSection, GuiSection, HardwareSection,
ManifestError, ManifestV1, MountsSection, ResourceLimits, RuntimeSection, SystemSection,
};
pub use normalize::{NormalizedManifest, NormalizedMount};
pub use preset::{get_preset, list_presets, Preset, BUILTIN_PRESETS};
pub use types::{EnvId, LayerHash, ObjectHash, ShortId};

View file

@ -0,0 +1,874 @@
use crate::identity::EnvIdentity;
use crate::manifest::ManifestError;
use crate::normalize::{NormalizedManifest, NormalizedMount};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum LockError {
#[error("manifest error: {0}")]
Manifest(#[from] ManifestError),
#[error("lock file I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("lock file parse error: {0}")]
Parse(#[from] toml::de::Error),
#[error("lock file serialize error: {0}")]
Serialize(#[from] toml::ser::Error),
#[error("lock file env_id mismatch: lock has '{lock_id}', recomputed '{computed_id}'")]
EnvIdMismatch {
lock_id: String,
computed_id: String,
},
#[error("lock file manifest drift: {0}")]
ManifestDrift(String),
}
/// A resolved package with pinned version.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct ResolvedPackage {
pub name: String,
pub version: String,
}
/// Result of dependency resolution against a base image.
#[derive(Debug, Clone)]
pub struct ResolutionResult {
/// Content hash (blake3) of the base image rootfs tarball.
pub base_image_digest: String,
/// Resolved packages with pinned versions.
pub resolved_packages: Vec<ResolvedPackage>,
}
/// The lock file captures the fully resolved state of an environment.
///
/// The env_id is computed deterministically from the locked fields,
/// not from unresolved manifest data. This guarantees:
/// same lockfile → same env_id → same environment.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct LockFile {
pub lock_version: u32,
pub env_id: String,
pub short_id: String,
// Base image identity
pub base_image: String,
pub base_image_digest: String,
// Resolved dependencies (version-pinned)
pub resolved_packages: Vec<ResolvedPackage>,
pub resolved_apps: Vec<String>,
// Runtime policy (included in hash contract)
pub runtime_backend: String,
pub hardware_gpu: bool,
pub hardware_audio: bool,
pub network_isolation: bool,
// Mount policy
#[serde(default)]
pub mounts: Vec<NormalizedMount>,
// Resource limits
#[serde(default)]
pub cpu_shares: Option<u64>,
#[serde(default)]
pub memory_limit_mb: Option<u64>,
}
impl LockFile {
/// Generate a lock file from a manifest and resolution results.
///
/// The env_id is computed from the resolved state, ensuring that
/// identical resolved dependencies always produce the same identity.
pub fn from_resolved(normalized: &NormalizedManifest, resolution: &ResolutionResult) -> Self {
let mut resolved_packages = resolution.resolved_packages.clone();
resolved_packages.sort();
let lock = LockFile {
lock_version: 2,
env_id: String::new(), // computed below
short_id: String::new(),
base_image: normalized.base_image.clone(),
base_image_digest: resolution.base_image_digest.clone(),
resolved_packages,
resolved_apps: normalized.gui_apps.clone(),
runtime_backend: normalized.runtime_backend.clone(),
hardware_gpu: normalized.hardware_gpu,
hardware_audio: normalized.hardware_audio,
network_isolation: normalized.network_isolation,
mounts: normalized.mounts.clone(),
cpu_shares: normalized.cpu_shares,
memory_limit_mb: normalized.memory_limit_mb,
};
let identity = lock.compute_identity();
LockFile {
env_id: identity.env_id.into_inner(),
short_id: identity.short_id.into_inner(),
..lock
}
}
/// Compute the environment identity from the locked state.
///
/// This is the canonical hash computation. It uses only resolved,
/// pinned data — never unresolved package names or image tags.
pub fn compute_identity(&self) -> EnvIdentity {
let mut hasher = blake3::Hasher::new();
// Base image: content digest, not tag name
hasher.update(format!("base_digest:{}", self.base_image_digest).as_bytes());
// Resolved packages: name@version (sorted)
for pkg in &self.resolved_packages {
hasher.update(format!("pkg:{}@{}", pkg.name, pkg.version).as_bytes());
}
// Apps (sorted by normalize)
for app in &self.resolved_apps {
hasher.update(format!("app:{app}").as_bytes());
}
// Hardware policy
if self.hardware_gpu {
hasher.update(b"hw:gpu");
}
if self.hardware_audio {
hasher.update(b"hw:audio");
}
// Mount policy (sorted by label in normalize)
for mount in &self.mounts {
hasher.update(
format!(
"mount:{}:{}:{}",
mount.label, mount.host_path, mount.container_path
)
.as_bytes(),
);
}
// Runtime backend
hasher.update(format!("backend:{}", self.runtime_backend).as_bytes());
// Network isolation
if self.network_isolation {
hasher.update(b"net:isolated");
}
// Resource limits
if let Some(cpu) = self.cpu_shares {
hasher.update(format!("cpu:{cpu}").as_bytes());
}
if let Some(mem) = self.memory_limit_mb {
hasher.update(format!("mem:{mem}").as_bytes());
}
let hex = hasher.finalize().to_hex().to_string();
let short = hex[..12].to_owned();
EnvIdentity {
env_id: crate::types::EnvId::new(hex),
short_id: crate::types::ShortId::new(short),
}
}
/// Verify that this lock file is internally consistent
/// (stored env_id matches recomputed env_id).
pub fn verify_integrity(&self) -> Result<EnvIdentity, LockError> {
let identity = self.compute_identity();
if self.env_id != identity.env_id.as_str() {
return Err(LockError::EnvIdMismatch {
lock_id: self.env_id.clone(),
computed_id: identity.env_id.into_inner(),
});
}
Ok(identity)
}
/// Check that a manifest's declared intent matches this lock file.
///
/// This catches cases where the manifest changed but the lock wasn't updated.
pub fn verify_manifest_intent(&self, normalized: &NormalizedManifest) -> Result<(), LockError> {
if self.base_image != normalized.base_image {
return Err(LockError::ManifestDrift(format!(
"base image changed: lock has '{}', manifest has '{}'",
self.base_image, normalized.base_image
)));
}
if self.runtime_backend != normalized.runtime_backend {
return Err(LockError::ManifestDrift(format!(
"runtime backend changed: lock has '{}', manifest has '{}'",
self.runtime_backend, normalized.runtime_backend
)));
}
// Check that all declared packages are present in the lock
let locked_names: Vec<&str> = self
.resolved_packages
.iter()
.map(|p| p.name.as_str())
.collect();
for pkg in &normalized.system_packages {
if !locked_names.contains(&pkg.as_str()) {
return Err(LockError::ManifestDrift(format!(
"package '{pkg}' is in manifest but not in lock file. Run 'karapace build' to re-resolve."
)));
}
}
if self.hardware_gpu != normalized.hardware_gpu
|| self.hardware_audio != normalized.hardware_audio
{
return Err(LockError::ManifestDrift(
"hardware policy changed. Run 'karapace build' to re-resolve.".to_owned(),
));
}
Ok(())
}
pub fn write_to_file(&self, path: impl AsRef<Path>) -> Result<(), LockError> {
let path = path.as_ref();
let content = toml::to_string_pretty(self)?;
let dir = path.parent().unwrap_or(Path::new("."));
let mut tmp = tempfile::NamedTempFile::new_in(dir)?;
std::io::Write::write_all(&mut tmp, content.as_bytes())?;
tmp.as_file().sync_all()?;
tmp.persist(path).map_err(|e| LockError::Io(e.error))?;
// Fsync parent directory to ensure rename durability on power loss.
if let Ok(f) = fs::File::open(dir) {
let _ = f.sync_all();
}
Ok(())
}
pub fn read_from_file(path: impl AsRef<Path>) -> Result<Self, LockError> {
let content = fs::read_to_string(path)?;
Ok(toml::from_str(&content)?)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::manifest::parse_manifest_str;
fn sample_normalized() -> NormalizedManifest {
parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "clang"]
"#,
)
.unwrap()
.normalize()
.unwrap()
}
fn sample_resolution() -> ResolutionResult {
ResolutionResult {
base_image_digest: "a".repeat(64),
resolved_packages: vec![
ResolvedPackage {
name: "clang".to_owned(),
version: "17.0.6-1".to_owned(),
},
ResolvedPackage {
name: "git".to_owned(),
version: "2.44.0-1".to_owned(),
},
],
}
}
#[test]
fn lock_roundtrip() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock = LockFile::from_resolved(&normalized, &resolution);
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("karapace.lock");
lock.write_to_file(&path).unwrap();
let loaded = LockFile::read_from_file(&path).unwrap();
assert_eq!(lock, loaded);
}
#[test]
fn lock_integrity_check_passes() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock = LockFile::from_resolved(&normalized, &resolution);
assert!(lock.verify_integrity().is_ok());
}
#[test]
fn lock_integrity_fails_on_tamper() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let mut lock = LockFile::from_resolved(&normalized, &resolution);
lock.env_id = "tampered".to_owned();
assert!(lock.verify_integrity().is_err());
}
#[test]
fn lock_contains_real_digest() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock = LockFile::from_resolved(&normalized, &resolution);
// Digest is the actual image digest, not a hash of the tag name
assert_eq!(lock.base_image_digest, "a".repeat(64));
assert_eq!(lock.base_image, "rolling");
}
#[test]
fn lock_contains_pinned_versions() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock = LockFile::from_resolved(&normalized, &resolution);
assert_eq!(lock.resolved_packages.len(), 2);
assert_eq!(lock.resolved_packages[0].name, "clang");
assert_eq!(lock.resolved_packages[0].version, "17.0.6-1");
assert_eq!(lock.resolved_packages[1].name, "git");
assert_eq!(lock.resolved_packages[1].version, "2.44.0-1");
}
#[test]
fn same_resolution_same_identity() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock1 = LockFile::from_resolved(&normalized, &resolution);
let lock2 = LockFile::from_resolved(&normalized, &resolution);
assert_eq!(lock1.env_id, lock2.env_id);
}
#[test]
fn different_versions_different_identity() {
let normalized = sample_normalized();
let res1 = sample_resolution();
let mut res2 = sample_resolution();
res2.resolved_packages[1].version = "2.45.0-1".to_owned();
let lock1 = LockFile::from_resolved(&normalized, &res1);
let lock2 = LockFile::from_resolved(&normalized, &res2);
assert_ne!(lock1.env_id, lock2.env_id);
}
#[test]
fn different_image_digest_different_identity() {
let normalized = sample_normalized();
let mut res1 = sample_resolution();
let mut res2 = sample_resolution();
res1.base_image_digest = "a".repeat(64);
res2.base_image_digest = "b".repeat(64);
let lock1 = LockFile::from_resolved(&normalized, &res1);
let lock2 = LockFile::from_resolved(&normalized, &res2);
assert_ne!(lock1.env_id, lock2.env_id);
}
#[test]
fn manifest_intent_verified() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock = LockFile::from_resolved(&normalized, &resolution);
assert!(lock.verify_manifest_intent(&normalized).is_ok());
}
#[test]
fn manifest_drift_detected() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock = LockFile::from_resolved(&normalized, &resolution);
// Change the manifest
let mut drifted = normalized.clone();
drifted.base_image = "ubuntu/24.04".to_owned();
assert!(lock.verify_manifest_intent(&drifted).is_err());
}
#[test]
fn includes_hardware_policy_in_identity() {
let mut n1 = sample_normalized();
let mut n2 = sample_normalized();
n1.hardware_gpu = false;
n2.hardware_gpu = true;
let res = sample_resolution();
let lock1 = LockFile::from_resolved(&n1, &res);
let lock2 = LockFile::from_resolved(&n2, &res);
assert_ne!(lock1.env_id, lock2.env_id);
}
// --- A1: Determinism Hardening ---
#[test]
fn hash_stable_across_repeated_invocations() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let mut ids = Vec::new();
for _ in 0..100 {
let lock = LockFile::from_resolved(&normalized, &resolution);
ids.push(lock.env_id.clone());
}
let first = &ids[0];
for (i, id) in ids.iter().enumerate() {
assert_eq!(first, id, "invocation {i} produced different env_id");
}
}
#[test]
fn hash_stable_with_randomized_package_order() {
let normalized = sample_normalized();
// Create resolutions with packages in different orders
let res_ab = ResolutionResult {
base_image_digest: "a".repeat(64),
resolved_packages: vec![
ResolvedPackage {
name: "alpha".to_owned(),
version: "1.0".to_owned(),
},
ResolvedPackage {
name: "beta".to_owned(),
version: "2.0".to_owned(),
},
ResolvedPackage {
name: "gamma".to_owned(),
version: "3.0".to_owned(),
},
],
};
let res_ba = ResolutionResult {
base_image_digest: "a".repeat(64),
resolved_packages: vec![
ResolvedPackage {
name: "gamma".to_owned(),
version: "3.0".to_owned(),
},
ResolvedPackage {
name: "alpha".to_owned(),
version: "1.0".to_owned(),
},
ResolvedPackage {
name: "beta".to_owned(),
version: "2.0".to_owned(),
},
],
};
let lock_ab = LockFile::from_resolved(&normalized, &res_ab);
let lock_ba = LockFile::from_resolved(&normalized, &res_ba);
assert_eq!(
lock_ab.env_id, lock_ba.env_id,
"package order must not affect env_id (sorted in from_resolved)"
);
}
#[test]
fn hash_stable_with_randomized_mount_order() {
use crate::normalize::NormalizedMount;
let mut n1 = sample_normalized();
n1.mounts = vec![
NormalizedMount {
label: "cache".to_owned(),
host_path: "/a".to_owned(),
container_path: "/b".to_owned(),
},
NormalizedMount {
label: "work".to_owned(),
host_path: "/c".to_owned(),
container_path: "/d".to_owned(),
},
];
let mut n2 = sample_normalized();
n2.mounts = vec![
NormalizedMount {
label: "work".to_owned(),
host_path: "/c".to_owned(),
container_path: "/d".to_owned(),
},
NormalizedMount {
label: "cache".to_owned(),
host_path: "/a".to_owned(),
container_path: "/b".to_owned(),
},
];
// Mounts are sorted by label in normalize(), but from_resolved doesn't re-sort.
// The hash input iterates mounts in order. For determinism, mounts must be
// pre-sorted by the caller (normalize). Test that identical sorted mounts hash equally.
n1.mounts.sort_by(|a, b| a.label.cmp(&b.label));
n2.mounts.sort_by(|a, b| a.label.cmp(&b.label));
let res = sample_resolution();
let lock1 = LockFile::from_resolved(&n1, &res);
let lock2 = LockFile::from_resolved(&n2, &res);
assert_eq!(lock1.env_id, lock2.env_id);
}
#[test]
fn cross_platform_path_normalization() {
// Verify that path separators in mount specs don't break determinism.
// On all platforms, mount paths are stored as-is from the manifest
// (which uses forward slashes). This test confirms no OS-dependent
// path mangling occurs.
use crate::normalize::NormalizedMount;
let mut n1 = sample_normalized();
n1.mounts = vec![NormalizedMount {
label: "src".to_owned(),
host_path: "/home/user/src".to_owned(),
container_path: "/workspace".to_owned(),
}];
let res = sample_resolution();
let lock = LockFile::from_resolved(&n1, &res);
// The env_id must be a fixed known value regardless of platform
let lock2 = LockFile::from_resolved(&n1, &res);
assert_eq!(lock.env_id, lock2.env_id);
// env_id must be exactly 64 hex chars
assert_eq!(lock.env_id.len(), 64);
assert!(lock.env_id.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn identical_inputs_produce_identical_hash_bytes() {
let normalized = sample_normalized();
let resolution = sample_resolution();
let lock1 = LockFile::from_resolved(&normalized, &resolution);
let lock2 = LockFile::from_resolved(&normalized, &resolution);
// Byte-level comparison of the full 64-char hex string
assert_eq!(
lock1.env_id.as_bytes(),
lock2.env_id.as_bytes(),
"hash bytes must be identical for identical inputs"
);
assert_eq!(lock1.short_id.as_bytes(), lock2.short_id.as_bytes(),);
}
// --- IG-M5: Golden-value cross-machine determinism tests ---
//
// These tests hardcode expected blake3 hashes for fixed inputs.
// If any of these fail, it means compute_identity() has changed behavior,
// which would break cross-machine reproducibility and existing lock files.
// The golden values were computed once and must remain stable forever.
fn golden_lock(
base_digest: &str,
packages: &[(&str, &str)],
mounts: &[(&str, &str, &str)],
backend: &str,
gpu: bool,
audio: bool,
network_isolation: bool,
) -> LockFile {
let resolved_packages: Vec<ResolvedPackage> = packages
.iter()
.map(|(n, v)| ResolvedPackage {
name: n.to_string(),
version: v.to_string(),
})
.collect();
let mount_specs: Vec<NormalizedMount> = mounts
.iter()
.map(|(l, h, c)| NormalizedMount {
label: l.to_string(),
host_path: h.to_string(),
container_path: c.to_string(),
})
.collect();
let normalized = NormalizedManifest {
manifest_version: 1,
base_image: "rolling".to_owned(),
system_packages: packages.iter().map(|(n, _)| n.to_string()).collect(),
gui_apps: Vec::new(),
hardware_gpu: gpu,
hardware_audio: audio,
mounts: mount_specs,
runtime_backend: backend.to_owned(),
network_isolation,
cpu_shares: None,
memory_limit_mb: None,
};
let resolution = ResolutionResult {
base_image_digest: base_digest.to_owned(),
resolved_packages,
};
LockFile::from_resolved(&normalized, &resolution)
}
#[test]
fn golden_identity_empty_manifest() {
let lock = golden_lock("sha256:abc123", &[], &[], "mock", false, false, false);
assert_eq!(
lock.env_id, "aabaeaeda3b27db42054f64719a16afd49e72b4fc6e8493e2fce9d862d240806",
"golden hash for empty manifest must be stable across all platforms"
);
}
#[test]
fn golden_identity_with_packages() {
let lock = golden_lock(
"sha256:abc123",
&[("curl", "7.88.1"), ("git", "2.39.2")],
&[],
"namespace",
false,
false,
false,
);
assert_eq!(
lock.env_id, "dfea3163e5925ee788a97fae24d9ec08f774c29c64c9180befe771d877e62f18",
"golden hash for manifest with packages must be stable across all platforms"
);
}
#[test]
fn golden_identity_with_mounts_and_hardware() {
let lock = golden_lock(
"sha256:abc123",
&[("vim", "9.0.1")],
&[("home", "/home/user", "/home")],
"namespace",
true,
true,
false,
);
assert_eq!(
lock.env_id, "d6ca89829da264240d0508bd58bffc28c2014f643426bbecff3db5a525793546",
"golden hash for manifest with mounts+hardware must be stable across all platforms"
);
}
#[test]
fn golden_identity_network_isolation_differs() {
let lock = golden_lock("sha256:abc123", &[], &[], "mock", false, false, true);
assert_eq!(
lock.env_id, "dcdae57b3749d0aa2d3948de9fde99ceedad34deaef9b618c2d9f939dac25596",
"golden hash for network-isolated manifest must be stable across all platforms"
);
// Must differ from the non-isolated empty manifest
assert_ne!(
lock.env_id, "aabaeaeda3b27db42054f64719a16afd49e72b4fc6e8493e2fce9d862d240806",
"network isolation must produce a different hash"
);
}
#[allow(clippy::too_many_arguments)]
fn golden_lock_full(
base_digest: &str,
packages: &[(&str, &str)],
mounts: &[(&str, &str, &str)],
apps: &[&str],
backend: &str,
gpu: bool,
audio: bool,
network_isolation: bool,
cpu_shares: Option<u64>,
memory_limit_mb: Option<u64>,
) -> LockFile {
let resolved_packages: Vec<ResolvedPackage> = packages
.iter()
.map(|(n, v)| ResolvedPackage {
name: n.to_string(),
version: v.to_string(),
})
.collect();
let mount_specs: Vec<NormalizedMount> = mounts
.iter()
.map(|(l, h, c)| NormalizedMount {
label: l.to_string(),
host_path: h.to_string(),
container_path: c.to_string(),
})
.collect();
let normalized = NormalizedManifest {
manifest_version: 1,
base_image: "rolling".to_owned(),
system_packages: packages.iter().map(|(n, _)| n.to_string()).collect(),
gui_apps: apps.iter().map(ToString::to_string).collect(),
hardware_gpu: gpu,
hardware_audio: audio,
mounts: mount_specs,
runtime_backend: backend.to_owned(),
network_isolation,
cpu_shares,
memory_limit_mb,
};
let resolution = ResolutionResult {
base_image_digest: base_digest.to_owned(),
resolved_packages,
};
LockFile::from_resolved(&normalized, &resolution)
}
#[test]
fn golden_identity_with_cpu_shares() {
let lock = golden_lock_full(
"sha256:abc123",
&[],
&[],
&[],
"mock",
false,
false,
false,
Some(1024),
None,
);
assert_eq!(
lock.env_id, "d966f9ee1c5e8959ae29d0483c45fc66813ec47201aa9f26c6371336b3dfd252",
"golden hash for cpu_shares=1024 must be stable across all platforms"
);
}
#[test]
fn golden_identity_with_memory_limit() {
let lock = golden_lock_full(
"sha256:abc123",
&[],
&[],
&[],
"mock",
false,
false,
false,
None,
Some(4096),
);
assert_eq!(
lock.env_id, "74823889e305b7b28394508b5813568faf9c814b4ef8f1f97e8d3dcd9a7a6bae",
"golden hash for memory_limit_mb=4096 must be stable across all platforms"
);
}
#[test]
fn golden_identity_with_apps() {
let lock = golden_lock_full(
"sha256:abc123",
&[],
&[],
&["firefox", "code"],
"mock",
false,
false,
false,
None,
None,
);
assert_eq!(
lock.env_id, "1aaf066c7b1e18178e838b0cf33c0bc67cd7401e586df826daa9033178ccfdf3",
"golden hash for gui_apps=[firefox,code] must be stable across all platforms"
);
}
#[test]
fn golden_identity_with_cpu_and_memory() {
let lock = golden_lock_full(
"sha256:abc123",
&[("curl", "7.88.1")],
&[("data", "/mnt/data", "/data")],
&["vlc"],
"namespace",
true,
true,
true,
Some(2048),
Some(8192),
);
assert_eq!(
lock.env_id, "44f9547036b4f24f8fe32844f2672804020c6260e29b7f72e17fd29d441ebc27",
"golden hash for fully-populated manifest must be stable across all platforms"
);
}
#[test]
fn golden_identity_gpu_only_differs_from_audio_only() {
let gpu_lock = golden_lock_full(
"sha256:abc123",
&[],
&[],
&[],
"mock",
true,
false,
false,
None,
None,
);
let audio_lock = golden_lock_full(
"sha256:abc123",
&[],
&[],
&[],
"mock",
false,
true,
false,
None,
None,
);
assert_eq!(
gpu_lock.env_id, "f761765ba48777bcc64c2cd5169cb44be27bcd2d6587c64c28bc98fa0964b266",
"golden hash for gpu-only must be stable"
);
assert_eq!(
audio_lock.env_id, "428d91b41a03c1625e01bab1278ef231fb186833bff80a6bdc8227a2276f4318",
"golden hash for audio-only must be stable"
);
assert_ne!(
gpu_lock.env_id, audio_lock.env_id,
"gpu-only and audio-only must produce different hashes"
);
}
#[test]
fn hash_sensitive_to_all_fields() {
let base_norm = sample_normalized();
let base_res = sample_resolution();
let base_id = LockFile::from_resolved(&base_norm, &base_res).env_id;
// Change each field and verify the hash changes
let mut n = base_norm.clone();
n.network_isolation = !n.network_isolation;
assert_ne!(
LockFile::from_resolved(&n, &base_res).env_id,
base_id,
"network_isolation"
);
let mut n = base_norm.clone();
n.cpu_shares = Some(1024);
assert_ne!(
LockFile::from_resolved(&n, &base_res).env_id,
base_id,
"cpu_shares"
);
let mut n = base_norm.clone();
n.memory_limit_mb = Some(4096);
assert_ne!(
LockFile::from_resolved(&n, &base_res).env_id,
base_id,
"memory_limit_mb"
);
let mut n = base_norm.clone();
n.runtime_backend = "oci".to_owned();
assert_ne!(
LockFile::from_resolved(&n, &base_res).env_id,
base_id,
"runtime_backend"
);
let mut n = base_norm.clone();
n.gui_apps = vec!["new-app".to_owned()];
assert_ne!(
LockFile::from_resolved(&n, &base_res).env_id,
base_id,
"gui_apps"
);
}
}

View file

@ -0,0 +1,192 @@
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ManifestError {
#[error("failed to read manifest file: {0}")]
Io(#[from] std::io::Error),
#[error("failed to parse manifest: {0}")]
ParseToml(#[from] toml::de::Error),
#[error("unsupported manifest_version: {0}, expected 1")]
UnsupportedVersion(u32),
#[error("base.image must not be empty")]
EmptyBaseImage,
#[error("mount label must not be empty")]
EmptyMountLabel,
#[error("invalid mount declaration for '{label}': '{spec}', expected '<host>:<container>'")]
InvalidMount { label: String, spec: String },
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct ManifestV1 {
pub manifest_version: u32,
pub base: BaseSection,
#[serde(default)]
pub system: SystemSection,
#[serde(default)]
pub gui: GuiSection,
#[serde(default)]
pub hardware: HardwareSection,
#[serde(default)]
pub mounts: MountsSection,
#[serde(default)]
pub runtime: RuntimeSection,
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct BaseSection {
pub image: String,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct SystemSection {
#[serde(default)]
pub packages: Vec<String>,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct GuiSection {
#[serde(default)]
pub apps: Vec<String>,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct HardwareSection {
#[serde(default)]
pub gpu: bool,
#[serde(default)]
pub audio: bool,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
pub struct MountsSection {
#[serde(flatten)]
pub entries: BTreeMap<String, String>,
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct RuntimeSection {
#[serde(default = "default_backend")]
pub backend: String,
#[serde(default)]
pub network_isolation: bool,
#[serde(default)]
pub resource_limits: ResourceLimits,
}
impl Default for RuntimeSection {
fn default() -> Self {
Self {
backend: default_backend(),
network_isolation: false,
resource_limits: ResourceLimits::default(),
}
}
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct ResourceLimits {
#[serde(default)]
pub cpu_shares: Option<u64>,
#[serde(default)]
pub memory_limit_mb: Option<u64>,
}
fn default_backend() -> String {
"namespace".to_owned()
}
pub fn parse_manifest_str(input: &str) -> Result<ManifestV1, ManifestError> {
Ok(toml::from_str(input)?)
}
pub fn parse_manifest_file(path: impl AsRef<Path>) -> Result<ManifestV1, ManifestError> {
let content = fs::read_to_string(path)?;
parse_manifest_str(&content)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_full_manifest() {
let input = r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["clang", "cmake", "git"]
[gui]
apps = ["ide", "debugger"]
[hardware]
gpu = true
audio = true
[mounts]
workspace = "./:/workspace"
[runtime]
backend = "oci"
network_isolation = true
[runtime.resource_limits]
cpu_shares = 1024
memory_limit_mb = 4096
"#;
let manifest = parse_manifest_str(input).expect("should parse");
assert_eq!(manifest.manifest_version, 1);
assert_eq!(manifest.base.image, "rolling");
assert_eq!(manifest.system.packages.len(), 3);
assert_eq!(manifest.runtime.backend, "oci");
assert!(manifest.runtime.network_isolation);
assert_eq!(manifest.runtime.resource_limits.cpu_shares, Some(1024));
}
#[test]
fn parses_minimal_manifest() {
let input = r#"
manifest_version = 1
[base]
image = "rolling"
"#;
let manifest = parse_manifest_str(input).expect("should parse");
assert_eq!(manifest.runtime.backend, "namespace");
assert!(!manifest.runtime.network_isolation);
}
#[test]
fn rejects_unknown_fields() {
let input = r#"
manifest_version = 1
[base]
image = "rolling"
unknown_field = true
"#;
assert!(parse_manifest_str(input).is_err());
}
#[test]
fn rejects_missing_base() {
let input = r"
manifest_version = 1
";
assert!(parse_manifest_str(input).is_err());
}
}

View file

@ -0,0 +1,224 @@
use crate::manifest::{ManifestError, ManifestV1};
use serde::{Deserialize, Serialize};
/// Canonical, sorted, deduplicated representation of a parsed manifest.
///
/// All optional fields are resolved to defaults, packages are sorted, and mounts
/// are validated. This is the input to identity hashing and lock file generation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct NormalizedManifest {
pub manifest_version: u32,
pub base_image: String,
pub system_packages: Vec<String>,
pub gui_apps: Vec<String>,
pub hardware_gpu: bool,
pub hardware_audio: bool,
pub mounts: Vec<NormalizedMount>,
pub runtime_backend: String,
pub network_isolation: bool,
pub cpu_shares: Option<u64>,
pub memory_limit_mb: Option<u64>,
}
/// A validated bind-mount specification with label, host path, and container path.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct NormalizedMount {
pub label: String,
pub host_path: String,
pub container_path: String,
}
impl ManifestV1 {
/// Normalize the manifest: validate fields, sort packages, resolve defaults.
pub fn normalize(&self) -> Result<NormalizedManifest, ManifestError> {
if self.manifest_version != 1 {
return Err(ManifestError::UnsupportedVersion(self.manifest_version));
}
let base_image = self.base.image.trim().to_owned();
if base_image.is_empty() {
return Err(ManifestError::EmptyBaseImage);
}
let mut mounts = Vec::with_capacity(self.mounts.entries.len());
for (label, spec) in &self.mounts.entries {
let trimmed_label = label.trim().to_owned();
if trimmed_label.is_empty() {
return Err(ManifestError::EmptyMountLabel);
}
let (host_path, container_path) = parse_mount_spec(label, spec)?;
mounts.push(NormalizedMount {
label: trimmed_label,
host_path,
container_path,
});
}
mounts.sort_by(|a, b| a.label.cmp(&b.label));
let runtime_backend = self.runtime.backend.trim().to_lowercase();
Ok(NormalizedManifest {
manifest_version: self.manifest_version,
base_image,
system_packages: normalize_string_list(&self.system.packages),
gui_apps: normalize_string_list(&self.gui.apps),
hardware_gpu: self.hardware.gpu,
hardware_audio: self.hardware.audio,
mounts,
runtime_backend,
network_isolation: self.runtime.network_isolation,
cpu_shares: self.runtime.resource_limits.cpu_shares,
memory_limit_mb: self.runtime.resource_limits.memory_limit_mb,
})
}
}
impl NormalizedManifest {
pub fn canonical_json(&self) -> String {
serde_json::to_string(self).expect("normalized manifest serialization is infallible")
}
}
fn parse_mount_spec(label: &str, spec: &str) -> Result<(String, String), ManifestError> {
let Some((host_raw, container_raw)) = spec.split_once(':') else {
return Err(ManifestError::InvalidMount {
label: label.to_owned(),
spec: spec.to_owned(),
});
};
let host_path = host_raw.trim().to_owned();
let container_path = container_raw.trim().to_owned();
if host_path.is_empty() || container_path.is_empty() {
return Err(ManifestError::InvalidMount {
label: label.to_owned(),
spec: spec.to_owned(),
});
}
Ok((host_path, container_path))
}
fn normalize_string_list(values: &[String]) -> Vec<String> {
let mut out: Vec<String> = values
.iter()
.map(|v| v.trim().to_owned())
.filter(|v| !v.is_empty())
.collect();
out.sort();
out.dedup();
out
}
#[cfg(test)]
mod tests {
use crate::manifest::parse_manifest_str;
#[test]
fn normalizes_and_sorts_deterministically() {
let input = r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "cmake", "git", "clang"]
[gui]
apps = ["debugger", "ide"]
[hardware]
gpu = true
audio = false
[mounts]
workspace = "./:/workspace"
cache = "~/.cache:/cache"
"#;
let manifest = parse_manifest_str(input).unwrap();
let normalized = manifest.normalize().unwrap();
assert_eq!(normalized.system_packages, vec!["clang", "cmake", "git"]);
assert_eq!(normalized.gui_apps, vec!["debugger", "ide"]);
assert_eq!(normalized.mounts[0].label, "cache");
assert_eq!(normalized.mounts[1].label, "workspace");
assert_eq!(normalized.runtime_backend, "namespace");
}
#[test]
fn equivalent_manifests_produce_same_canonical_json() {
let a = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "clang"]
"#,
)
.unwrap()
.normalize()
.unwrap();
let b = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["clang", "git"]
"#,
)
.unwrap()
.normalize()
.unwrap();
assert_eq!(a.canonical_json(), b.canonical_json());
}
#[test]
fn rejects_empty_base_image() {
let manifest = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = " "
"#,
)
.unwrap();
assert!(manifest.normalize().is_err());
}
#[test]
fn rejects_invalid_mount_spec() {
let manifest = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[mounts]
workspace = "./no-colon"
"#,
)
.unwrap();
assert!(manifest.normalize().is_err());
}
#[test]
fn runtime_backend_included_in_normalization() {
let manifest = parse_manifest_str(
r#"
manifest_version = 1
[base]
image = "rolling"
[runtime]
backend = "OCI"
"#,
)
.unwrap();
let normalized = manifest.normalize().unwrap();
assert_eq!(normalized.runtime_backend, "oci");
}
}

View file

@ -0,0 +1,143 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Preset {
pub name: &'static str,
pub description: &'static str,
pub manifest: &'static str,
}
pub const BUILTIN_PRESETS: &[Preset] = &[
Preset {
name: "dev",
description: "Development environment with common build tools",
manifest: r#"manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "curl", "wget", "vim", "gcc", "make", "cmake"]
[runtime]
backend = "namespace"
"#,
},
Preset {
name: "dev-rust",
description: "Rust development environment",
manifest: r#"manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "curl", "gcc", "make", "rustup"]
[runtime]
backend = "namespace"
"#,
},
Preset {
name: "dev-python",
description: "Python development environment",
manifest: r#"manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "curl", "python3", "python3-pip", "python3-venv"]
[runtime]
backend = "namespace"
"#,
},
Preset {
name: "gui-app",
description: "GUI application environment with GPU and audio passthrough",
manifest: r#"manifest_version = 1
[base]
image = "rolling"
[hardware]
gpu = true
audio = true
[runtime]
backend = "namespace"
"#,
},
Preset {
name: "gaming",
description: "Gaming environment with GPU, audio, and Vulkan support",
manifest: r#"manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["mesa-dri", "vulkan-loader", "libvulkan1", "alsa-plugins"]
[hardware]
gpu = true
audio = true
[runtime]
backend = "namespace"
"#,
},
Preset {
name: "minimal",
description: "Minimal environment with no extra packages",
manifest: r#"manifest_version = 1
[base]
image = "rolling"
[runtime]
backend = "namespace"
"#,
},
];
pub fn get_preset(name: &str) -> Option<&'static Preset> {
BUILTIN_PRESETS.iter().find(|p| p.name == name)
}
pub fn list_presets() -> &'static [Preset] {
BUILTIN_PRESETS
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn all_presets_parse() {
for preset in BUILTIN_PRESETS {
let result = crate::parse_manifest_str(preset.manifest);
assert!(
result.is_ok(),
"preset '{}' failed to parse: {:?}",
preset.name,
result.err()
);
}
}
#[test]
fn get_preset_by_name() {
assert!(get_preset("dev").is_some());
assert!(get_preset("nonexistent").is_none());
}
#[test]
fn all_presets_have_unique_names() {
let mut names: Vec<&str> = BUILTIN_PRESETS.iter().map(|p| p.name).collect();
names.sort_unstable();
names.dedup();
assert_eq!(names.len(), BUILTIN_PRESETS.len());
}
}

View file

@ -0,0 +1,158 @@
//! Newtype wrappers for string identifiers, providing compile-time type safety.
//!
//! All newtypes serialize/deserialize as plain strings for backward compatibility.
use serde::{Deserialize, Serialize};
use std::fmt;
use std::ops::Deref;
macro_rules! string_newtype {
($(#[$meta:meta])* $name:ident) => {
$(#[$meta])*
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct $name(String);
impl $name {
/// Create a new instance from a string.
pub fn new(s: impl Into<String>) -> Self {
Self(s.into())
}
/// Return the inner string as a slice.
pub fn as_str(&self) -> &str {
&self.0
}
/// Consume self and return the inner `String`.
pub fn into_inner(self) -> String {
self.0
}
}
impl Deref for $name {
type Target = str;
fn deref(&self) -> &str {
&self.0
}
}
impl fmt::Display for $name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for $name {
fn as_ref(&self) -> &str {
&self.0
}
}
impl PartialEq<str> for $name {
fn eq(&self, other: &str) -> bool {
self.0 == other
}
}
impl PartialEq<String> for $name {
fn eq(&self, other: &String) -> bool {
self.0 == *other
}
}
impl PartialEq<$name> for String {
fn eq(&self, other: &$name) -> bool {
*self == other.0
}
}
impl AsRef<std::path::Path> for $name {
fn as_ref(&self) -> &std::path::Path {
std::path::Path::new(&self.0)
}
}
impl From<String> for $name {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<&str> for $name {
fn from(s: &str) -> Self {
Self(s.to_owned())
}
}
};
}
string_newtype!(
/// Full 64-character hex environment identifier, derived from locked manifest content.
EnvId
);
string_newtype!(
/// Truncated 12-character prefix of an [`EnvId`], used for display.
ShortId
);
string_newtype!(
/// Blake3 hash of a content-addressable object in the store.
ObjectHash
);
string_newtype!(
/// Blake3 hash identifying a layer manifest.
LayerHash
);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn env_id_display_and_as_ref() {
let id = EnvId::new("abc123");
assert_eq!(id.to_string(), "abc123");
assert_eq!(id.as_str(), "abc123");
assert_eq!(AsRef::<str>::as_ref(&id), "abc123");
}
#[test]
fn env_id_serde_roundtrip() {
let id = EnvId::new("deadbeef");
let json = serde_json::to_string(&id).unwrap();
assert_eq!(json, "\"deadbeef\"");
let back: EnvId = serde_json::from_str(&json).unwrap();
assert_eq!(back, id);
}
#[test]
fn short_id_from_str() {
let sid = ShortId::from("abc123def456");
assert_eq!(sid.as_str(), "abc123def456");
}
#[test]
fn object_hash_into_inner() {
let h = ObjectHash::new("hash_value".to_owned());
assert_eq!(h.into_inner(), "hash_value");
}
#[test]
fn layer_hash_equality() {
let a = LayerHash::new("same");
let b = LayerHash::new("same");
let c = LayerHash::new("diff");
assert_eq!(a, b);
assert_ne!(a, c);
}
#[test]
fn env_id_from_string() {
let s = String::from("test_id");
let id: EnvId = s.into();
assert_eq!(id.as_str(), "test_id");
}
}