feat: karapace-schema — manifest v1, normalization, identity hashing, lock file v2

- TOML manifest parsing with strict schema validation (deny_unknown_fields) - Deterministic normalization: sorted packages, deduplication, canonical JSON - Two-phase identity: preliminary (from manifest) and canonical (from lock) - Lock file v2: resolved packages with pinned versions, base image content digest - Dual lock verification: integrity (hash) and manifest intent (drift detection) - Built-in presets: dev, dev-rust, dev-python, gui-app, gaming, minimal - Blake3 256-bit hashing throughout
2026-05-11 05:37:19 +00:00 · 2026-02-22 18:36:15 +01:00 · 2026-02-22 18:36:15 +01:00 · cdd13755a0
commit cdd13755a0
parent 78d40c0d0a
9 changed files with 3056 additions and 0 deletions
--- a/crates/karapace-schema/Cargo.toml
+++ b/crates/karapace-schema/Cargo.toml
@ -0,0 +1,18 @@
+[package]
+name = "karapace-schema"
+description = "Manifest parsing, normalization, identity hashing, and lock file for Karapace"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+
+[lints]
+workspace = true
+
+[dependencies]
+serde.workspace = true
+serde_json.workspace = true
+thiserror.workspace = true
+toml.workspace = true
+blake3.workspace = true
+tempfile.workspace = true
--- a/crates/karapace-schema/karapace-schema.cdx.json
+++ b/crates/karapace-schema/karapace-schema.cdx.json
--- a/crates/karapace-schema/src/identity.rs
+++ b/crates/karapace-schema/src/identity.rs
@ -0,0 +1,195 @@
+use crate::normalize::NormalizedManifest;
+use crate::types::{EnvId, ShortId};
+use serde::Serialize;
+
+/// Deterministic identity for an environment, derived from its manifest content.
+#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
+pub struct EnvIdentity {
+    pub env_id: EnvId,
+    pub short_id: ShortId,
+}
+
+/// Compute a **preliminary** environment identity from unresolved manifest data.
+///
+/// This is NOT the canonical identity. The canonical identity is computed by
+/// [`LockFile::compute_identity()`] after dependency resolution, which uses:
+/// - Actual base image content digest (not tag name hash)
+/// - Resolved package versions (not just package names)
+/// - Full hardware/mount/runtime policy
+///
+/// This function is used only for:
+/// - The `init` command (before resolution has occurred)
+/// - Internal lookup during rebuild (to find old environments)
+///
+/// After `build`, the env_id stored in metadata comes from the lock file.
+pub fn compute_env_id(normalized: &NormalizedManifest) -> EnvIdentity {
+    let mut hasher = blake3::Hasher::new();
+
+    hasher.update(normalized.canonical_json().as_bytes());
+
+    let base_digest = blake3::hash(normalized.base_image.as_bytes())
+        .to_hex()
+        .to_string();
+    hasher.update(base_digest.as_bytes());
+
+    for pkg in &normalized.system_packages {
+        hasher.update(format!("pkg:{pkg}").as_bytes());
+    }
+    for app in &normalized.gui_apps {
+        hasher.update(format!("app:{app}").as_bytes());
+    }
+
+    if normalized.hardware_gpu {
+        hasher.update(b"hw:gpu");
+    }
+    if normalized.hardware_audio {
+        hasher.update(b"hw:audio");
+    }
+
+    for mount in &normalized.mounts {
+        hasher.update(
+            format!(
+                "mount:{}:{}:{}",
+                mount.label, mount.host_path, mount.container_path
+            )
+            .as_bytes(),
+        );
+    }
+
+    hasher.update(format!("backend:{}", normalized.runtime_backend).as_bytes());
+
+    if normalized.network_isolation {
+        hasher.update(b"net:isolated");
+    }
+    if let Some(cpu) = normalized.cpu_shares {
+        hasher.update(format!("cpu:{cpu}").as_bytes());
+    }
+    if let Some(mem) = normalized.memory_limit_mb {
+        hasher.update(format!("mem:{mem}").as_bytes());
+    }
+
+    let hex = hasher.finalize().to_hex().to_string();
+    let short = hex[..12].to_owned();
+
+    EnvIdentity {
+        env_id: EnvId::new(hex),
+        short_id: ShortId::new(short),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::manifest::parse_manifest_str;
+
+    #[test]
+    fn stable_id_for_equivalent_manifests() {
+        let a = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["git", "clang"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        let b = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["clang", "git"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        assert_eq!(compute_env_id(&a), compute_env_id(&b));
+    }
+
+    #[test]
+    fn different_inputs_produce_different_ids() {
+        let a = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["git"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        let b = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["git", "cmake"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        assert_ne!(compute_env_id(&a), compute_env_id(&b));
+    }
+
+    #[test]
+    fn backend_change_changes_id() {
+        let a = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[runtime]
+backend = "namespace"
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        let b = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[runtime]
+backend = "oci"
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        assert_ne!(compute_env_id(&a), compute_env_id(&b));
+    }
+
+    #[test]
+    fn short_id_is_12_chars() {
+        let n = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        let id = compute_env_id(&n);
+        assert_eq!(id.short_id.as_str().len(), 12);
+        assert!(id.env_id.as_str().starts_with(id.short_id.as_str()));
+    }
+}
--- a/crates/karapace-schema/src/lib.rs
+++ b/crates/karapace-schema/src/lib.rs
@ -0,0 +1,23 @@
+//! Manifest parsing, normalization, lock files, and environment identity for Karapace.
+//!
+//! This crate defines the schema layer: TOML manifest parsing (`ManifestV1`),
+//! normalized representations (`NormalizedManifest`), deterministic environment
+//! identity computation (`compute_env_id`), lock file generation/verification
+//! (`LockFile`), and built-in preset definitions.
+
+pub mod identity;
+pub mod lock;
+pub mod manifest;
+pub mod normalize;
+pub mod preset;
+pub mod types;
+
+pub use identity::{compute_env_id, EnvIdentity};
+pub use lock::{LockError, LockFile, ResolutionResult, ResolvedPackage};
+pub use manifest::{
+    parse_manifest_file, parse_manifest_str, BaseSection, GuiSection, HardwareSection,
+    ManifestError, ManifestV1, MountsSection, ResourceLimits, RuntimeSection, SystemSection,
+};
+pub use normalize::{NormalizedManifest, NormalizedMount};
+pub use preset::{get_preset, list_presets, Preset, BUILTIN_PRESETS};
+pub use types::{EnvId, LayerHash, ObjectHash, ShortId};
--- a/crates/karapace-schema/src/lock.rs
+++ b/crates/karapace-schema/src/lock.rs
@ -0,0 +1,874 @@
+use crate::identity::EnvIdentity;
+use crate::manifest::ManifestError;
+use crate::normalize::{NormalizedManifest, NormalizedMount};
+use serde::{Deserialize, Serialize};
+use std::fs;
+use std::path::Path;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum LockError {
+    #[error("manifest error: {0}")]
+    Manifest(#[from] ManifestError),
+    #[error("lock file I/O error: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("lock file parse error: {0}")]
+    Parse(#[from] toml::de::Error),
+    #[error("lock file serialize error: {0}")]
+    Serialize(#[from] toml::ser::Error),
+    #[error("lock file env_id mismatch: lock has '{lock_id}', recomputed '{computed_id}'")]
+    EnvIdMismatch {
+        lock_id: String,
+        computed_id: String,
+    },
+    #[error("lock file manifest drift: {0}")]
+    ManifestDrift(String),
+}
+
+/// A resolved package with pinned version.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
+pub struct ResolvedPackage {
+    pub name: String,
+    pub version: String,
+}
+
+/// Result of dependency resolution against a base image.
+#[derive(Debug, Clone)]
+pub struct ResolutionResult {
+    /// Content hash (blake3) of the base image rootfs tarball.
+    pub base_image_digest: String,
+    /// Resolved packages with pinned versions.
+    pub resolved_packages: Vec<ResolvedPackage>,
+}
+
+/// The lock file captures the fully resolved state of an environment.
+///
+/// The env_id is computed deterministically from the locked fields,
+/// not from unresolved manifest data. This guarantees:
+///   same lockfile → same env_id → same environment.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct LockFile {
+    pub lock_version: u32,
+    pub env_id: String,
+    pub short_id: String,
+
+    // Base image identity
+    pub base_image: String,
+    pub base_image_digest: String,
+
+    // Resolved dependencies (version-pinned)
+    pub resolved_packages: Vec<ResolvedPackage>,
+    pub resolved_apps: Vec<String>,
+
+    // Runtime policy (included in hash contract)
+    pub runtime_backend: String,
+    pub hardware_gpu: bool,
+    pub hardware_audio: bool,
+    pub network_isolation: bool,
+
+    // Mount policy
+    #[serde(default)]
+    pub mounts: Vec<NormalizedMount>,
+
+    // Resource limits
+    #[serde(default)]
+    pub cpu_shares: Option<u64>,
+    #[serde(default)]
+    pub memory_limit_mb: Option<u64>,
+}
+
+impl LockFile {
+    /// Generate a lock file from a manifest and resolution results.
+    ///
+    /// The env_id is computed from the resolved state, ensuring that
+    /// identical resolved dependencies always produce the same identity.
+    pub fn from_resolved(normalized: &NormalizedManifest, resolution: &ResolutionResult) -> Self {
+        let mut resolved_packages = resolution.resolved_packages.clone();
+        resolved_packages.sort();
+
+        let lock = LockFile {
+            lock_version: 2,
+            env_id: String::new(), // computed below
+            short_id: String::new(),
+            base_image: normalized.base_image.clone(),
+            base_image_digest: resolution.base_image_digest.clone(),
+            resolved_packages,
+            resolved_apps: normalized.gui_apps.clone(),
+            runtime_backend: normalized.runtime_backend.clone(),
+            hardware_gpu: normalized.hardware_gpu,
+            hardware_audio: normalized.hardware_audio,
+            network_isolation: normalized.network_isolation,
+            mounts: normalized.mounts.clone(),
+            cpu_shares: normalized.cpu_shares,
+            memory_limit_mb: normalized.memory_limit_mb,
+        };
+
+        let identity = lock.compute_identity();
+        LockFile {
+            env_id: identity.env_id.into_inner(),
+            short_id: identity.short_id.into_inner(),
+            ..lock
+        }
+    }
+
+    /// Compute the environment identity from the locked state.
+    ///
+    /// This is the canonical hash computation. It uses only resolved,
+    /// pinned data — never unresolved package names or image tags.
+    pub fn compute_identity(&self) -> EnvIdentity {
+        let mut hasher = blake3::Hasher::new();
+
+        // Base image: content digest, not tag name
+        hasher.update(format!("base_digest:{}", self.base_image_digest).as_bytes());
+
+        // Resolved packages: name@version (sorted)
+        for pkg in &self.resolved_packages {
+            hasher.update(format!("pkg:{}@{}", pkg.name, pkg.version).as_bytes());
+        }
+
+        // Apps (sorted by normalize)
+        for app in &self.resolved_apps {
+            hasher.update(format!("app:{app}").as_bytes());
+        }
+
+        // Hardware policy
+        if self.hardware_gpu {
+            hasher.update(b"hw:gpu");
+        }
+        if self.hardware_audio {
+            hasher.update(b"hw:audio");
+        }
+
+        // Mount policy (sorted by label in normalize)
+        for mount in &self.mounts {
+            hasher.update(
+                format!(
+                    "mount:{}:{}:{}",
+                    mount.label, mount.host_path, mount.container_path
+                )
+                .as_bytes(),
+            );
+        }
+
+        // Runtime backend
+        hasher.update(format!("backend:{}", self.runtime_backend).as_bytes());
+
+        // Network isolation
+        if self.network_isolation {
+            hasher.update(b"net:isolated");
+        }
+
+        // Resource limits
+        if let Some(cpu) = self.cpu_shares {
+            hasher.update(format!("cpu:{cpu}").as_bytes());
+        }
+        if let Some(mem) = self.memory_limit_mb {
+            hasher.update(format!("mem:{mem}").as_bytes());
+        }
+
+        let hex = hasher.finalize().to_hex().to_string();
+        let short = hex[..12].to_owned();
+
+        EnvIdentity {
+            env_id: crate::types::EnvId::new(hex),
+            short_id: crate::types::ShortId::new(short),
+        }
+    }
+
+    /// Verify that this lock file is internally consistent
+    /// (stored env_id matches recomputed env_id).
+    pub fn verify_integrity(&self) -> Result<EnvIdentity, LockError> {
+        let identity = self.compute_identity();
+        if self.env_id != identity.env_id.as_str() {
+            return Err(LockError::EnvIdMismatch {
+                lock_id: self.env_id.clone(),
+                computed_id: identity.env_id.into_inner(),
+            });
+        }
+        Ok(identity)
+    }
+
+    /// Check that a manifest's declared intent matches this lock file.
+    ///
+    /// This catches cases where the manifest changed but the lock wasn't updated.
+    pub fn verify_manifest_intent(&self, normalized: &NormalizedManifest) -> Result<(), LockError> {
+        if self.base_image != normalized.base_image {
+            return Err(LockError::ManifestDrift(format!(
+                "base image changed: lock has '{}', manifest has '{}'",
+                self.base_image, normalized.base_image
+            )));
+        }
+        if self.runtime_backend != normalized.runtime_backend {
+            return Err(LockError::ManifestDrift(format!(
+                "runtime backend changed: lock has '{}', manifest has '{}'",
+                self.runtime_backend, normalized.runtime_backend
+            )));
+        }
+
+        // Check that all declared packages are present in the lock
+        let locked_names: Vec<&str> = self
+            .resolved_packages
+            .iter()
+            .map(|p| p.name.as_str())
+            .collect();
+        for pkg in &normalized.system_packages {
+            if !locked_names.contains(&pkg.as_str()) {
+                return Err(LockError::ManifestDrift(format!(
+                    "package '{pkg}' is in manifest but not in lock file. Run 'karapace build' to re-resolve."
+                )));
+            }
+        }
+
+        if self.hardware_gpu != normalized.hardware_gpu
+            || self.hardware_audio != normalized.hardware_audio
+        {
+            return Err(LockError::ManifestDrift(
+                "hardware policy changed. Run 'karapace build' to re-resolve.".to_owned(),
+            ));
+        }
+
+        Ok(())
+    }
+
+    pub fn write_to_file(&self, path: impl AsRef<Path>) -> Result<(), LockError> {
+        let path = path.as_ref();
+        let content = toml::to_string_pretty(self)?;
+        let dir = path.parent().unwrap_or(Path::new("."));
+        let mut tmp = tempfile::NamedTempFile::new_in(dir)?;
+        std::io::Write::write_all(&mut tmp, content.as_bytes())?;
+        tmp.as_file().sync_all()?;
+        tmp.persist(path).map_err(|e| LockError::Io(e.error))?;
+        // Fsync parent directory to ensure rename durability on power loss.
+        if let Ok(f) = fs::File::open(dir) {
+            let _ = f.sync_all();
+        }
+        Ok(())
+    }
+
+    pub fn read_from_file(path: impl AsRef<Path>) -> Result<Self, LockError> {
+        let content = fs::read_to_string(path)?;
+        Ok(toml::from_str(&content)?)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::manifest::parse_manifest_str;
+
+    fn sample_normalized() -> NormalizedManifest {
+        parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["git", "clang"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap()
+    }
+
+    fn sample_resolution() -> ResolutionResult {
+        ResolutionResult {
+            base_image_digest: "a".repeat(64),
+            resolved_packages: vec![
+                ResolvedPackage {
+                    name: "clang".to_owned(),
+                    version: "17.0.6-1".to_owned(),
+                },
+                ResolvedPackage {
+                    name: "git".to_owned(),
+                    version: "2.44.0-1".to_owned(),
+                },
+            ],
+        }
+    }
+
+    #[test]
+    fn lock_roundtrip() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock = LockFile::from_resolved(&normalized, &resolution);
+
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("karapace.lock");
+
+        lock.write_to_file(&path).unwrap();
+        let loaded = LockFile::read_from_file(&path).unwrap();
+        assert_eq!(lock, loaded);
+    }
+
+    #[test]
+    fn lock_integrity_check_passes() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock = LockFile::from_resolved(&normalized, &resolution);
+        assert!(lock.verify_integrity().is_ok());
+    }
+
+    #[test]
+    fn lock_integrity_fails_on_tamper() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let mut lock = LockFile::from_resolved(&normalized, &resolution);
+        lock.env_id = "tampered".to_owned();
+        assert!(lock.verify_integrity().is_err());
+    }
+
+    #[test]
+    fn lock_contains_real_digest() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock = LockFile::from_resolved(&normalized, &resolution);
+        // Digest is the actual image digest, not a hash of the tag name
+        assert_eq!(lock.base_image_digest, "a".repeat(64));
+        assert_eq!(lock.base_image, "rolling");
+    }
+
+    #[test]
+    fn lock_contains_pinned_versions() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock = LockFile::from_resolved(&normalized, &resolution);
+        assert_eq!(lock.resolved_packages.len(), 2);
+        assert_eq!(lock.resolved_packages[0].name, "clang");
+        assert_eq!(lock.resolved_packages[0].version, "17.0.6-1");
+        assert_eq!(lock.resolved_packages[1].name, "git");
+        assert_eq!(lock.resolved_packages[1].version, "2.44.0-1");
+    }
+
+    #[test]
+    fn same_resolution_same_identity() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock1 = LockFile::from_resolved(&normalized, &resolution);
+        let lock2 = LockFile::from_resolved(&normalized, &resolution);
+        assert_eq!(lock1.env_id, lock2.env_id);
+    }
+
+    #[test]
+    fn different_versions_different_identity() {
+        let normalized = sample_normalized();
+        let res1 = sample_resolution();
+        let mut res2 = sample_resolution();
+        res2.resolved_packages[1].version = "2.45.0-1".to_owned();
+
+        let lock1 = LockFile::from_resolved(&normalized, &res1);
+        let lock2 = LockFile::from_resolved(&normalized, &res2);
+        assert_ne!(lock1.env_id, lock2.env_id);
+    }
+
+    #[test]
+    fn different_image_digest_different_identity() {
+        let normalized = sample_normalized();
+        let mut res1 = sample_resolution();
+        let mut res2 = sample_resolution();
+        res1.base_image_digest = "a".repeat(64);
+        res2.base_image_digest = "b".repeat(64);
+
+        let lock1 = LockFile::from_resolved(&normalized, &res1);
+        let lock2 = LockFile::from_resolved(&normalized, &res2);
+        assert_ne!(lock1.env_id, lock2.env_id);
+    }
+
+    #[test]
+    fn manifest_intent_verified() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock = LockFile::from_resolved(&normalized, &resolution);
+        assert!(lock.verify_manifest_intent(&normalized).is_ok());
+    }
+
+    #[test]
+    fn manifest_drift_detected() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock = LockFile::from_resolved(&normalized, &resolution);
+
+        // Change the manifest
+        let mut drifted = normalized.clone();
+        drifted.base_image = "ubuntu/24.04".to_owned();
+        assert!(lock.verify_manifest_intent(&drifted).is_err());
+    }
+
+    #[test]
+    fn includes_hardware_policy_in_identity() {
+        let mut n1 = sample_normalized();
+        let mut n2 = sample_normalized();
+        n1.hardware_gpu = false;
+        n2.hardware_gpu = true;
+        let res = sample_resolution();
+        let lock1 = LockFile::from_resolved(&n1, &res);
+        let lock2 = LockFile::from_resolved(&n2, &res);
+        assert_ne!(lock1.env_id, lock2.env_id);
+    }
+
+    // --- A1: Determinism Hardening ---
+
+    #[test]
+    fn hash_stable_across_repeated_invocations() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let mut ids = Vec::new();
+        for _ in 0..100 {
+            let lock = LockFile::from_resolved(&normalized, &resolution);
+            ids.push(lock.env_id.clone());
+        }
+        let first = &ids[0];
+        for (i, id) in ids.iter().enumerate() {
+            assert_eq!(first, id, "invocation {i} produced different env_id");
+        }
+    }
+
+    #[test]
+    fn hash_stable_with_randomized_package_order() {
+        let normalized = sample_normalized();
+        // Create resolutions with packages in different orders
+        let res_ab = ResolutionResult {
+            base_image_digest: "a".repeat(64),
+            resolved_packages: vec![
+                ResolvedPackage {
+                    name: "alpha".to_owned(),
+                    version: "1.0".to_owned(),
+                },
+                ResolvedPackage {
+                    name: "beta".to_owned(),
+                    version: "2.0".to_owned(),
+                },
+                ResolvedPackage {
+                    name: "gamma".to_owned(),
+                    version: "3.0".to_owned(),
+                },
+            ],
+        };
+        let res_ba = ResolutionResult {
+            base_image_digest: "a".repeat(64),
+            resolved_packages: vec![
+                ResolvedPackage {
+                    name: "gamma".to_owned(),
+                    version: "3.0".to_owned(),
+                },
+                ResolvedPackage {
+                    name: "alpha".to_owned(),
+                    version: "1.0".to_owned(),
+                },
+                ResolvedPackage {
+                    name: "beta".to_owned(),
+                    version: "2.0".to_owned(),
+                },
+            ],
+        };
+        let lock_ab = LockFile::from_resolved(&normalized, &res_ab);
+        let lock_ba = LockFile::from_resolved(&normalized, &res_ba);
+        assert_eq!(
+            lock_ab.env_id, lock_ba.env_id,
+            "package order must not affect env_id (sorted in from_resolved)"
+        );
+    }
+
+    #[test]
+    fn hash_stable_with_randomized_mount_order() {
+        use crate::normalize::NormalizedMount;
+        let mut n1 = sample_normalized();
+        n1.mounts = vec![
+            NormalizedMount {
+                label: "cache".to_owned(),
+                host_path: "/a".to_owned(),
+                container_path: "/b".to_owned(),
+            },
+            NormalizedMount {
+                label: "work".to_owned(),
+                host_path: "/c".to_owned(),
+                container_path: "/d".to_owned(),
+            },
+        ];
+        let mut n2 = sample_normalized();
+        n2.mounts = vec![
+            NormalizedMount {
+                label: "work".to_owned(),
+                host_path: "/c".to_owned(),
+                container_path: "/d".to_owned(),
+            },
+            NormalizedMount {
+                label: "cache".to_owned(),
+                host_path: "/a".to_owned(),
+                container_path: "/b".to_owned(),
+            },
+        ];
+        // Mounts are sorted by label in normalize(), but from_resolved doesn't re-sort.
+        // The hash input iterates mounts in order. For determinism, mounts must be
+        // pre-sorted by the caller (normalize). Test that identical sorted mounts hash equally.
+        n1.mounts.sort_by(|a, b| a.label.cmp(&b.label));
+        n2.mounts.sort_by(|a, b| a.label.cmp(&b.label));
+        let res = sample_resolution();
+        let lock1 = LockFile::from_resolved(&n1, &res);
+        let lock2 = LockFile::from_resolved(&n2, &res);
+        assert_eq!(lock1.env_id, lock2.env_id);
+    }
+
+    #[test]
+    fn cross_platform_path_normalization() {
+        // Verify that path separators in mount specs don't break determinism.
+        // On all platforms, mount paths are stored as-is from the manifest
+        // (which uses forward slashes). This test confirms no OS-dependent
+        // path mangling occurs.
+        use crate::normalize::NormalizedMount;
+        let mut n1 = sample_normalized();
+        n1.mounts = vec![NormalizedMount {
+            label: "src".to_owned(),
+            host_path: "/home/user/src".to_owned(),
+            container_path: "/workspace".to_owned(),
+        }];
+        let res = sample_resolution();
+        let lock = LockFile::from_resolved(&n1, &res);
+
+        // The env_id must be a fixed known value regardless of platform
+        let lock2 = LockFile::from_resolved(&n1, &res);
+        assert_eq!(lock.env_id, lock2.env_id);
+        // env_id must be exactly 64 hex chars
+        assert_eq!(lock.env_id.len(), 64);
+        assert!(lock.env_id.chars().all(|c| c.is_ascii_hexdigit()));
+    }
+
+    #[test]
+    fn identical_inputs_produce_identical_hash_bytes() {
+        let normalized = sample_normalized();
+        let resolution = sample_resolution();
+        let lock1 = LockFile::from_resolved(&normalized, &resolution);
+        let lock2 = LockFile::from_resolved(&normalized, &resolution);
+        // Byte-level comparison of the full 64-char hex string
+        assert_eq!(
+            lock1.env_id.as_bytes(),
+            lock2.env_id.as_bytes(),
+            "hash bytes must be identical for identical inputs"
+        );
+        assert_eq!(lock1.short_id.as_bytes(), lock2.short_id.as_bytes(),);
+    }
+
+    // --- IG-M5: Golden-value cross-machine determinism tests ---
+    //
+    // These tests hardcode expected blake3 hashes for fixed inputs.
+    // If any of these fail, it means compute_identity() has changed behavior,
+    // which would break cross-machine reproducibility and existing lock files.
+    // The golden values were computed once and must remain stable forever.
+
+    fn golden_lock(
+        base_digest: &str,
+        packages: &[(&str, &str)],
+        mounts: &[(&str, &str, &str)],
+        backend: &str,
+        gpu: bool,
+        audio: bool,
+        network_isolation: bool,
+    ) -> LockFile {
+        let resolved_packages: Vec<ResolvedPackage> = packages
+            .iter()
+            .map(|(n, v)| ResolvedPackage {
+                name: n.to_string(),
+                version: v.to_string(),
+            })
+            .collect();
+        let mount_specs: Vec<NormalizedMount> = mounts
+            .iter()
+            .map(|(l, h, c)| NormalizedMount {
+                label: l.to_string(),
+                host_path: h.to_string(),
+                container_path: c.to_string(),
+            })
+            .collect();
+        let normalized = NormalizedManifest {
+            manifest_version: 1,
+            base_image: "rolling".to_owned(),
+            system_packages: packages.iter().map(|(n, _)| n.to_string()).collect(),
+            gui_apps: Vec::new(),
+            hardware_gpu: gpu,
+            hardware_audio: audio,
+            mounts: mount_specs,
+            runtime_backend: backend.to_owned(),
+            network_isolation,
+            cpu_shares: None,
+            memory_limit_mb: None,
+        };
+        let resolution = ResolutionResult {
+            base_image_digest: base_digest.to_owned(),
+            resolved_packages,
+        };
+        LockFile::from_resolved(&normalized, &resolution)
+    }
+
+    #[test]
+    fn golden_identity_empty_manifest() {
+        let lock = golden_lock("sha256:abc123", &[], &[], "mock", false, false, false);
+        assert_eq!(
+            lock.env_id, "aabaeaeda3b27db42054f64719a16afd49e72b4fc6e8493e2fce9d862d240806",
+            "golden hash for empty manifest must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_with_packages() {
+        let lock = golden_lock(
+            "sha256:abc123",
+            &[("curl", "7.88.1"), ("git", "2.39.2")],
+            &[],
+            "namespace",
+            false,
+            false,
+            false,
+        );
+        assert_eq!(
+            lock.env_id, "dfea3163e5925ee788a97fae24d9ec08f774c29c64c9180befe771d877e62f18",
+            "golden hash for manifest with packages must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_with_mounts_and_hardware() {
+        let lock = golden_lock(
+            "sha256:abc123",
+            &[("vim", "9.0.1")],
+            &[("home", "/home/user", "/home")],
+            "namespace",
+            true,
+            true,
+            false,
+        );
+        assert_eq!(
+            lock.env_id, "d6ca89829da264240d0508bd58bffc28c2014f643426bbecff3db5a525793546",
+            "golden hash for manifest with mounts+hardware must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_network_isolation_differs() {
+        let lock = golden_lock("sha256:abc123", &[], &[], "mock", false, false, true);
+        assert_eq!(
+            lock.env_id, "dcdae57b3749d0aa2d3948de9fde99ceedad34deaef9b618c2d9f939dac25596",
+            "golden hash for network-isolated manifest must be stable across all platforms"
+        );
+        // Must differ from the non-isolated empty manifest
+        assert_ne!(
+            lock.env_id, "aabaeaeda3b27db42054f64719a16afd49e72b4fc6e8493e2fce9d862d240806",
+            "network isolation must produce a different hash"
+        );
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn golden_lock_full(
+        base_digest: &str,
+        packages: &[(&str, &str)],
+        mounts: &[(&str, &str, &str)],
+        apps: &[&str],
+        backend: &str,
+        gpu: bool,
+        audio: bool,
+        network_isolation: bool,
+        cpu_shares: Option<u64>,
+        memory_limit_mb: Option<u64>,
+    ) -> LockFile {
+        let resolved_packages: Vec<ResolvedPackage> = packages
+            .iter()
+            .map(|(n, v)| ResolvedPackage {
+                name: n.to_string(),
+                version: v.to_string(),
+            })
+            .collect();
+        let mount_specs: Vec<NormalizedMount> = mounts
+            .iter()
+            .map(|(l, h, c)| NormalizedMount {
+                label: l.to_string(),
+                host_path: h.to_string(),
+                container_path: c.to_string(),
+            })
+            .collect();
+        let normalized = NormalizedManifest {
+            manifest_version: 1,
+            base_image: "rolling".to_owned(),
+            system_packages: packages.iter().map(|(n, _)| n.to_string()).collect(),
+            gui_apps: apps.iter().map(ToString::to_string).collect(),
+            hardware_gpu: gpu,
+            hardware_audio: audio,
+            mounts: mount_specs,
+            runtime_backend: backend.to_owned(),
+            network_isolation,
+            cpu_shares,
+            memory_limit_mb,
+        };
+        let resolution = ResolutionResult {
+            base_image_digest: base_digest.to_owned(),
+            resolved_packages,
+        };
+        LockFile::from_resolved(&normalized, &resolution)
+    }
+
+    #[test]
+    fn golden_identity_with_cpu_shares() {
+        let lock = golden_lock_full(
+            "sha256:abc123",
+            &[],
+            &[],
+            &[],
+            "mock",
+            false,
+            false,
+            false,
+            Some(1024),
+            None,
+        );
+        assert_eq!(
+            lock.env_id, "d966f9ee1c5e8959ae29d0483c45fc66813ec47201aa9f26c6371336b3dfd252",
+            "golden hash for cpu_shares=1024 must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_with_memory_limit() {
+        let lock = golden_lock_full(
+            "sha256:abc123",
+            &[],
+            &[],
+            &[],
+            "mock",
+            false,
+            false,
+            false,
+            None,
+            Some(4096),
+        );
+        assert_eq!(
+            lock.env_id, "74823889e305b7b28394508b5813568faf9c814b4ef8f1f97e8d3dcd9a7a6bae",
+            "golden hash for memory_limit_mb=4096 must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_with_apps() {
+        let lock = golden_lock_full(
+            "sha256:abc123",
+            &[],
+            &[],
+            &["firefox", "code"],
+            "mock",
+            false,
+            false,
+            false,
+            None,
+            None,
+        );
+        assert_eq!(
+            lock.env_id, "1aaf066c7b1e18178e838b0cf33c0bc67cd7401e586df826daa9033178ccfdf3",
+            "golden hash for gui_apps=[firefox,code] must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_with_cpu_and_memory() {
+        let lock = golden_lock_full(
+            "sha256:abc123",
+            &[("curl", "7.88.1")],
+            &[("data", "/mnt/data", "/data")],
+            &["vlc"],
+            "namespace",
+            true,
+            true,
+            true,
+            Some(2048),
+            Some(8192),
+        );
+        assert_eq!(
+            lock.env_id, "44f9547036b4f24f8fe32844f2672804020c6260e29b7f72e17fd29d441ebc27",
+            "golden hash for fully-populated manifest must be stable across all platforms"
+        );
+    }
+
+    #[test]
+    fn golden_identity_gpu_only_differs_from_audio_only() {
+        let gpu_lock = golden_lock_full(
+            "sha256:abc123",
+            &[],
+            &[],
+            &[],
+            "mock",
+            true,
+            false,
+            false,
+            None,
+            None,
+        );
+        let audio_lock = golden_lock_full(
+            "sha256:abc123",
+            &[],
+            &[],
+            &[],
+            "mock",
+            false,
+            true,
+            false,
+            None,
+            None,
+        );
+        assert_eq!(
+            gpu_lock.env_id, "f761765ba48777bcc64c2cd5169cb44be27bcd2d6587c64c28bc98fa0964b266",
+            "golden hash for gpu-only must be stable"
+        );
+        assert_eq!(
+            audio_lock.env_id, "428d91b41a03c1625e01bab1278ef231fb186833bff80a6bdc8227a2276f4318",
+            "golden hash for audio-only must be stable"
+        );
+        assert_ne!(
+            gpu_lock.env_id, audio_lock.env_id,
+            "gpu-only and audio-only must produce different hashes"
+        );
+    }
+
+    #[test]
+    fn hash_sensitive_to_all_fields() {
+        let base_norm = sample_normalized();
+        let base_res = sample_resolution();
+        let base_id = LockFile::from_resolved(&base_norm, &base_res).env_id;
+
+        // Change each field and verify the hash changes
+        let mut n = base_norm.clone();
+        n.network_isolation = !n.network_isolation;
+        assert_ne!(
+            LockFile::from_resolved(&n, &base_res).env_id,
+            base_id,
+            "network_isolation"
+        );
+
+        let mut n = base_norm.clone();
+        n.cpu_shares = Some(1024);
+        assert_ne!(
+            LockFile::from_resolved(&n, &base_res).env_id,
+            base_id,
+            "cpu_shares"
+        );
+
+        let mut n = base_norm.clone();
+        n.memory_limit_mb = Some(4096);
+        assert_ne!(
+            LockFile::from_resolved(&n, &base_res).env_id,
+            base_id,
+            "memory_limit_mb"
+        );
+
+        let mut n = base_norm.clone();
+        n.runtime_backend = "oci".to_owned();
+        assert_ne!(
+            LockFile::from_resolved(&n, &base_res).env_id,
+            base_id,
+            "runtime_backend"
+        );
+
+        let mut n = base_norm.clone();
+        n.gui_apps = vec!["new-app".to_owned()];
+        assert_ne!(
+            LockFile::from_resolved(&n, &base_res).env_id,
+            base_id,
+            "gui_apps"
+        );
+    }
+}
--- a/crates/karapace-schema/src/manifest.rs
+++ b/crates/karapace-schema/src/manifest.rs
@ -0,0 +1,192 @@
+use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
+use std::fs;
+use std::path::Path;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum ManifestError {
+    #[error("failed to read manifest file: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("failed to parse manifest: {0}")]
+    ParseToml(#[from] toml::de::Error),
+    #[error("unsupported manifest_version: {0}, expected 1")]
+    UnsupportedVersion(u32),
+    #[error("base.image must not be empty")]
+    EmptyBaseImage,
+    #[error("mount label must not be empty")]
+    EmptyMountLabel,
+    #[error("invalid mount declaration for '{label}': '{spec}', expected '<host>:<container>'")]
+    InvalidMount { label: String, spec: String },
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct ManifestV1 {
+    pub manifest_version: u32,
+    pub base: BaseSection,
+    #[serde(default)]
+    pub system: SystemSection,
+    #[serde(default)]
+    pub gui: GuiSection,
+    #[serde(default)]
+    pub hardware: HardwareSection,
+    #[serde(default)]
+    pub mounts: MountsSection,
+    #[serde(default)]
+    pub runtime: RuntimeSection,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct BaseSection {
+    pub image: String,
+}
+
+#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct SystemSection {
+    #[serde(default)]
+    pub packages: Vec<String>,
+}
+
+#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct GuiSection {
+    #[serde(default)]
+    pub apps: Vec<String>,
+}
+
+#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct HardwareSection {
+    #[serde(default)]
+    pub gpu: bool,
+    #[serde(default)]
+    pub audio: bool,
+}
+
+#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
+pub struct MountsSection {
+    #[serde(flatten)]
+    pub entries: BTreeMap<String, String>,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct RuntimeSection {
+    #[serde(default = "default_backend")]
+    pub backend: String,
+    #[serde(default)]
+    pub network_isolation: bool,
+    #[serde(default)]
+    pub resource_limits: ResourceLimits,
+}
+
+impl Default for RuntimeSection {
+    fn default() -> Self {
+        Self {
+            backend: default_backend(),
+            network_isolation: false,
+            resource_limits: ResourceLimits::default(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct ResourceLimits {
+    #[serde(default)]
+    pub cpu_shares: Option<u64>,
+    #[serde(default)]
+    pub memory_limit_mb: Option<u64>,
+}
+
+fn default_backend() -> String {
+    "namespace".to_owned()
+}
+
+pub fn parse_manifest_str(input: &str) -> Result<ManifestV1, ManifestError> {
+    Ok(toml::from_str(input)?)
+}
+
+pub fn parse_manifest_file(path: impl AsRef<Path>) -> Result<ManifestV1, ManifestError> {
+    let content = fs::read_to_string(path)?;
+    parse_manifest_str(&content)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_full_manifest() {
+        let input = r#"
+manifest_version = 1
+
+[base]
+image = "rolling"
+
+[system]
+packages = ["clang", "cmake", "git"]
+
+[gui]
+apps = ["ide", "debugger"]
+
+[hardware]
+gpu = true
+audio = true
+
+[mounts]
+workspace = "./:/workspace"
+
+[runtime]
+backend = "oci"
+network_isolation = true
+
+[runtime.resource_limits]
+cpu_shares = 1024
+memory_limit_mb = 4096
+"#;
+        let manifest = parse_manifest_str(input).expect("should parse");
+        assert_eq!(manifest.manifest_version, 1);
+        assert_eq!(manifest.base.image, "rolling");
+        assert_eq!(manifest.system.packages.len(), 3);
+        assert_eq!(manifest.runtime.backend, "oci");
+        assert!(manifest.runtime.network_isolation);
+        assert_eq!(manifest.runtime.resource_limits.cpu_shares, Some(1024));
+    }
+
+    #[test]
+    fn parses_minimal_manifest() {
+        let input = r#"
+manifest_version = 1
+
+[base]
+image = "rolling"
+"#;
+        let manifest = parse_manifest_str(input).expect("should parse");
+        assert_eq!(manifest.runtime.backend, "namespace");
+        assert!(!manifest.runtime.network_isolation);
+    }
+
+    #[test]
+    fn rejects_unknown_fields() {
+        let input = r#"
+manifest_version = 1
+
+[base]
+image = "rolling"
+unknown_field = true
+"#;
+        assert!(parse_manifest_str(input).is_err());
+    }
+
+    #[test]
+    fn rejects_missing_base() {
+        let input = r"
+manifest_version = 1
+";
+        assert!(parse_manifest_str(input).is_err());
+    }
+}
--- a/crates/karapace-schema/src/normalize.rs
+++ b/crates/karapace-schema/src/normalize.rs
@ -0,0 +1,224 @@
+use crate::manifest::{ManifestError, ManifestV1};
+use serde::{Deserialize, Serialize};
+
+/// Canonical, sorted, deduplicated representation of a parsed manifest.
+///
+/// All optional fields are resolved to defaults, packages are sorted, and mounts
+/// are validated. This is the input to identity hashing and lock file generation.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct NormalizedManifest {
+    pub manifest_version: u32,
+    pub base_image: String,
+    pub system_packages: Vec<String>,
+    pub gui_apps: Vec<String>,
+    pub hardware_gpu: bool,
+    pub hardware_audio: bool,
+    pub mounts: Vec<NormalizedMount>,
+    pub runtime_backend: String,
+    pub network_isolation: bool,
+    pub cpu_shares: Option<u64>,
+    pub memory_limit_mb: Option<u64>,
+}
+
+/// A validated bind-mount specification with label, host path, and container path.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct NormalizedMount {
+    pub label: String,
+    pub host_path: String,
+    pub container_path: String,
+}
+
+impl ManifestV1 {
+    /// Normalize the manifest: validate fields, sort packages, resolve defaults.
+    pub fn normalize(&self) -> Result<NormalizedManifest, ManifestError> {
+        if self.manifest_version != 1 {
+            return Err(ManifestError::UnsupportedVersion(self.manifest_version));
+        }
+
+        let base_image = self.base.image.trim().to_owned();
+        if base_image.is_empty() {
+            return Err(ManifestError::EmptyBaseImage);
+        }
+
+        let mut mounts = Vec::with_capacity(self.mounts.entries.len());
+        for (label, spec) in &self.mounts.entries {
+            let trimmed_label = label.trim().to_owned();
+            if trimmed_label.is_empty() {
+                return Err(ManifestError::EmptyMountLabel);
+            }
+            let (host_path, container_path) = parse_mount_spec(label, spec)?;
+            mounts.push(NormalizedMount {
+                label: trimmed_label,
+                host_path,
+                container_path,
+            });
+        }
+        mounts.sort_by(|a, b| a.label.cmp(&b.label));
+
+        let runtime_backend = self.runtime.backend.trim().to_lowercase();
+
+        Ok(NormalizedManifest {
+            manifest_version: self.manifest_version,
+            base_image,
+            system_packages: normalize_string_list(&self.system.packages),
+            gui_apps: normalize_string_list(&self.gui.apps),
+            hardware_gpu: self.hardware.gpu,
+            hardware_audio: self.hardware.audio,
+            mounts,
+            runtime_backend,
+            network_isolation: self.runtime.network_isolation,
+            cpu_shares: self.runtime.resource_limits.cpu_shares,
+            memory_limit_mb: self.runtime.resource_limits.memory_limit_mb,
+        })
+    }
+}
+
+impl NormalizedManifest {
+    pub fn canonical_json(&self) -> String {
+        serde_json::to_string(self).expect("normalized manifest serialization is infallible")
+    }
+}
+
+fn parse_mount_spec(label: &str, spec: &str) -> Result<(String, String), ManifestError> {
+    let Some((host_raw, container_raw)) = spec.split_once(':') else {
+        return Err(ManifestError::InvalidMount {
+            label: label.to_owned(),
+            spec: spec.to_owned(),
+        });
+    };
+
+    let host_path = host_raw.trim().to_owned();
+    let container_path = container_raw.trim().to_owned();
+
+    if host_path.is_empty() || container_path.is_empty() {
+        return Err(ManifestError::InvalidMount {
+            label: label.to_owned(),
+            spec: spec.to_owned(),
+        });
+    }
+
+    Ok((host_path, container_path))
+}
+
+fn normalize_string_list(values: &[String]) -> Vec<String> {
+    let mut out: Vec<String> = values
+        .iter()
+        .map(|v| v.trim().to_owned())
+        .filter(|v| !v.is_empty())
+        .collect();
+    out.sort();
+    out.dedup();
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::manifest::parse_manifest_str;
+
+    #[test]
+    fn normalizes_and_sorts_deterministically() {
+        let input = r#"
+manifest_version = 1
+
+[base]
+image = "rolling"
+
+[system]
+packages = ["git", "cmake", "git", "clang"]
+
+[gui]
+apps = ["debugger", "ide"]
+
+[hardware]
+gpu = true
+audio = false
+
+[mounts]
+workspace = "./:/workspace"
+cache = "~/.cache:/cache"
+"#;
+        let manifest = parse_manifest_str(input).unwrap();
+        let normalized = manifest.normalize().unwrap();
+
+        assert_eq!(normalized.system_packages, vec!["clang", "cmake", "git"]);
+        assert_eq!(normalized.gui_apps, vec!["debugger", "ide"]);
+        assert_eq!(normalized.mounts[0].label, "cache");
+        assert_eq!(normalized.mounts[1].label, "workspace");
+        assert_eq!(normalized.runtime_backend, "namespace");
+    }
+
+    #[test]
+    fn equivalent_manifests_produce_same_canonical_json() {
+        let a = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["git", "clang"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        let b = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[system]
+packages = ["clang", "git"]
+"#,
+        )
+        .unwrap()
+        .normalize()
+        .unwrap();
+
+        assert_eq!(a.canonical_json(), b.canonical_json());
+    }
+
+    #[test]
+    fn rejects_empty_base_image() {
+        let manifest = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "   "
+"#,
+        )
+        .unwrap();
+        assert!(manifest.normalize().is_err());
+    }
+
+    #[test]
+    fn rejects_invalid_mount_spec() {
+        let manifest = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[mounts]
+workspace = "./no-colon"
+"#,
+        )
+        .unwrap();
+        assert!(manifest.normalize().is_err());
+    }
+
+    #[test]
+    fn runtime_backend_included_in_normalization() {
+        let manifest = parse_manifest_str(
+            r#"
+manifest_version = 1
+[base]
+image = "rolling"
+[runtime]
+backend = "OCI"
+"#,
+        )
+        .unwrap();
+        let normalized = manifest.normalize().unwrap();
+        assert_eq!(normalized.runtime_backend, "oci");
+    }
+}
--- a/crates/karapace-schema/src/preset.rs
+++ b/crates/karapace-schema/src/preset.rs
@ -0,0 +1,143 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct Preset {
+    pub name: &'static str,
+    pub description: &'static str,
+    pub manifest: &'static str,
+}
+
+pub const BUILTIN_PRESETS: &[Preset] = &[
+    Preset {
+        name: "dev",
+        description: "Development environment with common build tools",
+        manifest: r#"manifest_version = 1
+
+[base]
+image = "rolling"
+
+[system]
+packages = ["git", "curl", "wget", "vim", "gcc", "make", "cmake"]
+
+[runtime]
+backend = "namespace"
+"#,
+    },
+    Preset {
+        name: "dev-rust",
+        description: "Rust development environment",
+        manifest: r#"manifest_version = 1
+
+[base]
+image = "rolling"
+
+[system]
+packages = ["git", "curl", "gcc", "make", "rustup"]
+
+[runtime]
+backend = "namespace"
+"#,
+    },
+    Preset {
+        name: "dev-python",
+        description: "Python development environment",
+        manifest: r#"manifest_version = 1
+
+[base]
+image = "rolling"
+
+[system]
+packages = ["git", "curl", "python3", "python3-pip", "python3-venv"]
+
+[runtime]
+backend = "namespace"
+"#,
+    },
+    Preset {
+        name: "gui-app",
+        description: "GUI application environment with GPU and audio passthrough",
+        manifest: r#"manifest_version = 1
+
+[base]
+image = "rolling"
+
+[hardware]
+gpu = true
+audio = true
+
+[runtime]
+backend = "namespace"
+"#,
+    },
+    Preset {
+        name: "gaming",
+        description: "Gaming environment with GPU, audio, and Vulkan support",
+        manifest: r#"manifest_version = 1
+
+[base]
+image = "rolling"
+
+[system]
+packages = ["mesa-dri", "vulkan-loader", "libvulkan1", "alsa-plugins"]
+
+[hardware]
+gpu = true
+audio = true
+
+[runtime]
+backend = "namespace"
+"#,
+    },
+    Preset {
+        name: "minimal",
+        description: "Minimal environment with no extra packages",
+        manifest: r#"manifest_version = 1
+
+[base]
+image = "rolling"
+
+[runtime]
+backend = "namespace"
+"#,
+    },
+];
+
+pub fn get_preset(name: &str) -> Option<&'static Preset> {
+    BUILTIN_PRESETS.iter().find(|p| p.name == name)
+}
+
+pub fn list_presets() -> &'static [Preset] {
+    BUILTIN_PRESETS
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn all_presets_parse() {
+        for preset in BUILTIN_PRESETS {
+            let result = crate::parse_manifest_str(preset.manifest);
+            assert!(
+                result.is_ok(),
+                "preset '{}' failed to parse: {:?}",
+                preset.name,
+                result.err()
+            );
+        }
+    }
+
+    #[test]
+    fn get_preset_by_name() {
+        assert!(get_preset("dev").is_some());
+        assert!(get_preset("nonexistent").is_none());
+    }
+
+    #[test]
+    fn all_presets_have_unique_names() {
+        let mut names: Vec<&str> = BUILTIN_PRESETS.iter().map(|p| p.name).collect();
+        names.sort_unstable();
+        names.dedup();
+        assert_eq!(names.len(), BUILTIN_PRESETS.len());
+    }
+}
--- a/crates/karapace-schema/src/types.rs
+++ b/crates/karapace-schema/src/types.rs
@ -0,0 +1,158 @@
+//! Newtype wrappers for string identifiers, providing compile-time type safety.
+//!
+//! All newtypes serialize/deserialize as plain strings for backward compatibility.
+
+use serde::{Deserialize, Serialize};
+use std::fmt;
+use std::ops::Deref;
+
+macro_rules! string_newtype {
+    ($(#[$meta:meta])* $name:ident) => {
+        $(#[$meta])*
+        #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
+        #[serde(transparent)]
+        pub struct $name(String);
+
+        impl $name {
+            /// Create a new instance from a string.
+            pub fn new(s: impl Into<String>) -> Self {
+                Self(s.into())
+            }
+
+            /// Return the inner string as a slice.
+            pub fn as_str(&self) -> &str {
+                &self.0
+            }
+
+            /// Consume self and return the inner `String`.
+            pub fn into_inner(self) -> String {
+                self.0
+            }
+        }
+
+        impl Deref for $name {
+            type Target = str;
+            fn deref(&self) -> &str {
+                &self.0
+            }
+        }
+
+        impl fmt::Display for $name {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                f.write_str(&self.0)
+            }
+        }
+
+        impl AsRef<str> for $name {
+            fn as_ref(&self) -> &str {
+                &self.0
+            }
+        }
+
+        impl PartialEq<str> for $name {
+            fn eq(&self, other: &str) -> bool {
+                self.0 == other
+            }
+        }
+
+        impl PartialEq<String> for $name {
+            fn eq(&self, other: &String) -> bool {
+                self.0 == *other
+            }
+        }
+
+        impl PartialEq<$name> for String {
+            fn eq(&self, other: &$name) -> bool {
+                *self == other.0
+            }
+        }
+
+        impl AsRef<std::path::Path> for $name {
+            fn as_ref(&self) -> &std::path::Path {
+                std::path::Path::new(&self.0)
+            }
+        }
+
+        impl From<String> for $name {
+            fn from(s: String) -> Self {
+                Self(s)
+            }
+        }
+
+        impl From<&str> for $name {
+            fn from(s: &str) -> Self {
+                Self(s.to_owned())
+            }
+        }
+    };
+}
+
+string_newtype!(
+    /// Full 64-character hex environment identifier, derived from locked manifest content.
+    EnvId
+);
+
+string_newtype!(
+    /// Truncated 12-character prefix of an [`EnvId`], used for display.
+    ShortId
+);
+
+string_newtype!(
+    /// Blake3 hash of a content-addressable object in the store.
+    ObjectHash
+);
+
+string_newtype!(
+    /// Blake3 hash identifying a layer manifest.
+    LayerHash
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn env_id_display_and_as_ref() {
+        let id = EnvId::new("abc123");
+        assert_eq!(id.to_string(), "abc123");
+        assert_eq!(id.as_str(), "abc123");
+        assert_eq!(AsRef::<str>::as_ref(&id), "abc123");
+    }
+
+    #[test]
+    fn env_id_serde_roundtrip() {
+        let id = EnvId::new("deadbeef");
+        let json = serde_json::to_string(&id).unwrap();
+        assert_eq!(json, "\"deadbeef\"");
+        let back: EnvId = serde_json::from_str(&json).unwrap();
+        assert_eq!(back, id);
+    }
+
+    #[test]
+    fn short_id_from_str() {
+        let sid = ShortId::from("abc123def456");
+        assert_eq!(sid.as_str(), "abc123def456");
+    }
+
+    #[test]
+    fn object_hash_into_inner() {
+        let h = ObjectHash::new("hash_value".to_owned());
+        assert_eq!(h.into_inner(), "hash_value");
+    }
+
+    #[test]
+    fn layer_hash_equality() {
+        let a = LayerHash::new("same");
+        let b = LayerHash::new("same");
+        let c = LayerHash::new("diff");
+        assert_eq!(a, b);
+        assert_ne!(a, c);
+    }
+
+    #[test]
+    fn env_id_from_string() {
+        let s = String::from("test_id");
+        let id: EnvId = s.into();
+        assert_eq!(id.as_str(), "test_id");
+    }
+}