feat: karapace-core — engine orchestration, lifecycle state machine, drift control

- Engine: init → resolve → lock → build → enter/exec → freeze → archive → destroy
- Cached store_root_str avoiding repeated to_string_lossy() allocations
- WAL-protected build, enter, exec, destroy, commit, restore, GC operations
- Overlay drift detection: diff/commit/export via upper_dir scanning
- Deterministic snapshot commit with composite identity hashing
- Atomic restore via staging directory swap
- StoreLock: compile-time enforcement via type parameter on gc()
- Signal handler: SIGINT/SIGTERM graceful shutdown with AtomicBool
- Push/pull delegation to karapace-remote backend
- Crash recovery: stale .running marker cleanup on Engine::new()
- Integration tests, E2E tests, crash injection tests, ENOSPC simulation
- Criterion benchmarks: build, rebuild, commit, restore, GC, verify
This commit is contained in:
Marco Allegretti 2026-02-22 18:37:02 +01:00
parent 8493831222
commit f535020600
13 changed files with 8329 additions and 0 deletions

View file

@ -0,0 +1,37 @@
[package]
name = "karapace-core"
description = "Build engine, lifecycle state machine, drift control, and concurrency for Karapace"
version.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true
[lints]
workspace = true
[dependencies]
blake3.workspace = true
serde.workspace = true
serde_json.workspace = true
thiserror.workspace = true
chrono.workspace = true
ctrlc.workspace = true
tracing.workspace = true
fs2.workspace = true
libc.workspace = true
karapace-schema = { path = "../karapace-schema" }
karapace-store = { path = "../karapace-store" }
karapace-runtime = { path = "../karapace-runtime" }
karapace-remote = { path = "../karapace-remote" }
tempfile.workspace = true
[dev-dependencies]
criterion.workspace = true
[[bin]]
name = "stress_test"
path = "src/bin/stress_test.rs"
[[bench]]
name = "engine_benchmarks"
harness = false

View file

@ -0,0 +1,202 @@
use criterion::{criterion_group, criterion_main, Criterion};
use std::fs;
use std::path::Path;
fn create_test_manifest(dir: &Path) -> std::path::PathBuf {
let manifest_path = dir.join("karapace.toml");
fs::write(
&manifest_path,
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "clang"]
[runtime]
backend = "mock"
"#,
)
.unwrap();
manifest_path
}
fn bench_build(c: &mut Criterion) {
c.bench_function("engine_build_mock_2pkg", |b| {
b.iter_with_setup(
|| {
let store_dir = tempfile::tempdir().unwrap();
let project_dir = tempfile::tempdir().unwrap();
let manifest = create_test_manifest(project_dir.path());
let engine = karapace_core::Engine::new(store_dir.path());
(store_dir, project_dir, manifest, engine)
},
|(_sd, _pd, manifest, engine)| {
engine.build(&manifest).unwrap();
},
);
});
}
fn bench_rebuild_unchanged(c: &mut Criterion) {
c.bench_function("engine_rebuild_unchanged", |b| {
b.iter_with_setup(
|| {
let store_dir = tempfile::tempdir().unwrap();
let project_dir = tempfile::tempdir().unwrap();
let manifest = create_test_manifest(project_dir.path());
let engine = karapace_core::Engine::new(store_dir.path());
let result = engine.build(&manifest).unwrap();
(store_dir, project_dir, manifest, engine, result)
},
|(_sd, _pd, manifest, engine, _result)| {
engine.build(&manifest).unwrap();
},
);
});
}
fn bench_commit(c: &mut Criterion) {
c.bench_function("engine_commit_100files", |b| {
b.iter_with_setup(
|| {
let store_dir = tempfile::tempdir().unwrap();
let project_dir = tempfile::tempdir().unwrap();
let manifest = create_test_manifest(project_dir.path());
let engine = karapace_core::Engine::new(store_dir.path());
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.to_string();
// Create 100 files in the upper directory to simulate drift
let upper = store_dir.path().join("env").join(&env_id).join("upper");
fs::create_dir_all(&upper).unwrap();
for i in 0..100 {
fs::write(
upper.join(format!("file_{i:03}.txt")),
format!("content {i}"),
)
.unwrap();
}
(store_dir, project_dir, engine, env_id)
},
|(_sd, _pd, engine, env_id)| {
engine.commit(&env_id).unwrap();
},
);
});
}
fn bench_restore(c: &mut Criterion) {
c.bench_function("engine_restore_snapshot", |b| {
b.iter_with_setup(
|| {
let store_dir = tempfile::tempdir().unwrap();
let project_dir = tempfile::tempdir().unwrap();
let manifest = create_test_manifest(project_dir.path());
let engine = karapace_core::Engine::new(store_dir.path());
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.to_string();
// Create files and commit a snapshot
let upper = store_dir.path().join("env").join(&env_id).join("upper");
fs::create_dir_all(&upper).unwrap();
for i in 0..50 {
fs::write(
upper.join(format!("file_{i:03}.txt")),
format!("content {i}"),
)
.unwrap();
}
let snapshot_hash = engine.commit(&env_id).unwrap();
(store_dir, project_dir, engine, env_id, snapshot_hash)
},
|(_sd, _pd, engine, env_id, snapshot_hash)| {
engine.restore(&env_id, &snapshot_hash).unwrap();
},
);
});
}
fn bench_gc(c: &mut Criterion) {
c.bench_function("gc_50envs", |b| {
b.iter_with_setup(
|| {
let store_dir = tempfile::tempdir().unwrap();
let layout = karapace_store::StoreLayout::new(store_dir.path());
layout.initialize().unwrap();
let meta_store = karapace_store::MetadataStore::new(layout.clone());
let obj_store = karapace_store::ObjectStore::new(layout.clone());
// Create 50 environments: 25 live (ref_count=1), 25 dead (ref_count=0)
for i in 0..50 {
let obj_hash = obj_store.put(format!("obj-{i}").as_bytes()).unwrap();
let meta = karapace_store::EnvMetadata {
env_id: format!("env_{i:04}").into(),
short_id: format!("env_{i:04}").into(),
name: None,
state: karapace_store::EnvState::Built,
manifest_hash: obj_hash.into(),
base_layer: "".into(),
dependency_layers: vec![],
policy_layer: None,
created_at: "2026-01-01T00:00:00Z".to_owned(),
updated_at: "2026-01-01T00:00:00Z".to_owned(),
ref_count: u32::from(i < 25),
checksum: None,
};
meta_store.put(&meta).unwrap();
}
// Create 200 orphan objects
for i in 0..200 {
obj_store
.put(format!("orphan-object-{i}").as_bytes())
.unwrap();
}
(store_dir, layout)
},
|(_sd, layout)| {
let gc = karapace_store::GarbageCollector::new(layout);
gc.collect(false).unwrap();
},
);
});
}
fn bench_verify_store(c: &mut Criterion) {
c.bench_function("verify_store_200objects", |b| {
b.iter_with_setup(
|| {
let store_dir = tempfile::tempdir().unwrap();
let layout = karapace_store::StoreLayout::new(store_dir.path());
layout.initialize().unwrap();
let obj_store = karapace_store::ObjectStore::new(layout.clone());
// Create 200 objects
for i in 0..200 {
obj_store
.put(format!("verify-object-{i}").as_bytes())
.unwrap();
}
(store_dir, layout)
},
|(_sd, layout)| {
karapace_store::verify_store_integrity(&layout).unwrap();
},
);
});
}
criterion_group!(
benches,
bench_build,
bench_rebuild_unchanged,
bench_commit,
bench_restore,
bench_gc,
bench_verify_store,
);
criterion_main!(benches);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,238 @@
//! Long-running stress test for the Karapace engine.
//!
//! Runs hundreds of build/commit/destroy/gc cycles with the mock backend,
//! checking for resource leaks (orphaned files, stale WAL entries, metadata
//! corruption) after every cycle.
//!
//! Usage:
//! cargo run --bin stress_test -- [--cycles N]
use karapace_core::Engine;
use karapace_store::{verify_store_integrity, GarbageCollector, StoreLayout};
use std::fs;
use std::path::Path;
use std::time::{Duration, Instant};
fn write_manifest(dir: &Path) -> std::path::PathBuf {
let manifest_path = dir.join("karapace.toml");
fs::write(
&manifest_path,
r#"manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["git", "clang"]
[runtime]
backend = "mock"
"#,
)
.expect("write manifest");
manifest_path
}
fn count_files_in(dir: &Path) -> usize {
if !dir.exists() {
return 0;
}
fs::read_dir(dir)
.map(|rd| {
rd.filter_map(Result::ok)
.filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
.count()
})
.unwrap_or(0)
}
struct Timings {
build: Duration,
commit: Duration,
destroy: Duration,
gc: Duration,
}
fn run_cycle(
engine: &Engine,
manifest_path: &Path,
layout: &StoreLayout,
cycle: usize,
timings: &mut Timings,
) -> Result<(), String> {
let t0 = Instant::now();
let build_result = engine
.build(manifest_path)
.map_err(|e| format!("cycle {cycle}: BUILD FAILED: {e}"))?;
timings.build += t0.elapsed();
let env_id = build_result.identity.env_id.to_string();
let t0 = Instant::now();
if let Err(e) = engine.commit(&env_id) {
eprintln!(" cycle {cycle}: COMMIT FAILED: {e}");
}
timings.commit += t0.elapsed();
let t0 = Instant::now();
engine
.destroy(&env_id)
.map_err(|e| format!("cycle {cycle}: DESTROY FAILED: {e}"))?;
timings.destroy += t0.elapsed();
if cycle.is_multiple_of(10) {
let gc = GarbageCollector::new(layout.clone());
let t0 = Instant::now();
match gc.collect(false) {
Ok(report) => {
timings.gc += t0.elapsed();
if cycle.is_multiple_of(100) {
println!(
" cycle {cycle}: GC collected {} objects, {} layers",
report.removed_objects, report.removed_layers
);
}
}
Err(e) => return Err(format!("cycle {cycle}: GC FAILED: {e}")),
}
}
Ok(())
}
fn check_health(layout: &StoreLayout, wal_dir: &Path, cycle: usize) -> u64 {
let mut failures = 0u64;
match verify_store_integrity(layout) {
Ok(report) => {
if !report.failed.is_empty() {
eprintln!(
" cycle {cycle}: INTEGRITY FAILURE: {} objects failed",
report.failed.len()
);
failures += 1;
}
}
Err(e) => {
eprintln!(" cycle {cycle}: INTEGRITY CHECK ERROR: {e}");
failures += 1;
}
}
let wal_files = count_files_in(wal_dir);
if wal_files > 0 {
eprintln!(" cycle {cycle}: WAL LEAK: {wal_files} stale entries");
failures += 1;
}
let meta_count = count_files_in(&layout.metadata_dir());
if meta_count > 0 {
eprintln!(" cycle {cycle}: METADATA LEAK: {meta_count} entries after full destroy+gc");
}
failures
}
fn print_report(
cycles: usize,
failures: u64,
timings: &Timings,
layout: &StoreLayout,
wal_dir: &Path,
) {
let final_integrity = verify_store_integrity(layout);
let wal_files = count_files_in(wal_dir);
println!();
println!("============================================");
println!("Results: {cycles} cycles, {failures} failures");
println!(
" build: {:.3}s total, {:.3}ms avg",
timings.build.as_secs_f64(),
timings.build.as_secs_f64() * 1000.0 / cycles as f64
);
println!(
" commit: {:.3}s total, {:.3}ms avg",
timings.commit.as_secs_f64(),
timings.commit.as_secs_f64() * 1000.0 / cycles as f64
);
println!(
" destroy: {:.3}s total, {:.3}ms avg",
timings.destroy.as_secs_f64(),
timings.destroy.as_secs_f64() * 1000.0 / cycles as f64
);
println!(" gc: {:.3}s total", timings.gc.as_secs_f64());
println!(" WAL entries remaining: {wal_files}");
println!(
" metadata remaining: {}",
count_files_in(&layout.metadata_dir())
);
println!(
" objects remaining: {}",
count_files_in(&layout.objects_dir())
);
println!(
" layers remaining: {}",
count_files_in(&layout.layers_dir())
);
match final_integrity {
Ok(report) => println!(
" integrity: {} checked, {} passed, {} failed",
report.checked,
report.passed,
report.failed.len()
),
Err(e) => println!(" integrity: ERROR: {e}"),
}
if failures > 0 || wal_files > 0 {
eprintln!("\nSTRESS TEST FAILED");
std::process::exit(1);
} else {
println!("\nSTRESS TEST PASSED");
}
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let cycles: usize = args
.iter()
.position(|a| a == "--cycles")
.and_then(|i| args.get(i + 1))
.and_then(|s| s.parse().ok())
.unwrap_or(500);
println!("Karapace stress test: {cycles} cycles");
println!("============================================");
let store_dir = tempfile::tempdir().expect("create temp dir");
let project_dir = tempfile::tempdir().expect("create project dir");
let manifest_path = write_manifest(project_dir.path());
let layout = StoreLayout::new(store_dir.path());
layout.initialize().expect("initialize store");
let engine = Engine::new(store_dir.path());
let wal_dir = store_dir.path().join("store").join("wal");
let mut timings = Timings {
build: Duration::ZERO,
commit: Duration::ZERO,
destroy: Duration::ZERO,
gc: Duration::ZERO,
};
let mut failures = 0u64;
for cycle in 1..=cycles {
if let Err(msg) = run_cycle(&engine, &manifest_path, &layout, cycle, &mut timings) {
eprintln!(" {msg}");
failures += 1;
continue;
}
if cycle.is_multiple_of(50) {
failures += check_health(&layout, &wal_dir, cycle);
}
if cycle.is_multiple_of(100) {
let elapsed = timings.build + timings.commit + timings.destroy + timings.gc;
println!(
" cycle {cycle}/{cycles}: {:.1}s elapsed, {failures} failures",
elapsed.as_secs_f64()
);
}
}
let gc = GarbageCollector::new(layout.clone());
let _ = gc.collect(false);
print_report(cycles, failures, &timings, &layout, &wal_dir);
}

View file

@ -0,0 +1,106 @@
use crate::CoreError;
use fs2::FileExt;
use std::fs::{File, OpenOptions};
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
pub struct StoreLock {
lock_file: File,
}
impl StoreLock {
pub fn acquire(lock_path: &Path) -> Result<Self, CoreError> {
if let Some(parent) = lock_path.parent() {
std::fs::create_dir_all(parent)?;
}
let file = OpenOptions::new()
.create(true)
.write(true)
.truncate(false)
.open(lock_path)?;
file.lock_exclusive()
.map_err(|e| CoreError::Io(std::io::Error::new(std::io::ErrorKind::WouldBlock, e)))?;
Ok(Self { lock_file: file })
}
pub fn try_acquire(lock_path: &Path) -> Result<Option<Self>, CoreError> {
if let Some(parent) = lock_path.parent() {
std::fs::create_dir_all(parent)?;
}
let file = OpenOptions::new()
.create(true)
.write(true)
.truncate(false)
.open(lock_path)?;
match file.try_lock_exclusive() {
Ok(()) => Ok(Some(Self { lock_file: file })),
Err(_) => Ok(None),
}
}
}
impl Drop for StoreLock {
fn drop(&mut self) {
let _ = self.lock_file.unlock();
}
}
static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
pub fn install_signal_handler() {
let _ = ctrlc::set_handler(move || {
if SHUTDOWN_REQUESTED.load(Ordering::SeqCst) {
std::process::exit(1);
}
SHUTDOWN_REQUESTED.store(true, Ordering::SeqCst);
eprintln!("\nshutdown requested, finishing current operation...");
});
}
pub fn shutdown_requested() -> bool {
SHUTDOWN_REQUESTED.load(Ordering::SeqCst)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lock_acquire_and_release() {
let dir = tempfile::tempdir().unwrap();
let lock_path = dir.path().join("test.lock");
{
let _lock = StoreLock::acquire(&lock_path).unwrap();
assert!(lock_path.exists());
}
}
#[test]
fn try_acquire_returns_none_when_held() {
let dir = tempfile::tempdir().unwrap();
let lock_path = dir.path().join("test.lock");
let _lock = StoreLock::acquire(&lock_path).unwrap();
let result = StoreLock::try_acquire(&lock_path).unwrap();
assert!(result.is_none());
}
#[test]
fn lock_released_on_drop() {
let dir = tempfile::tempdir().unwrap();
let lock_path = dir.path().join("test.lock");
{
let _lock = StoreLock::acquire(&lock_path).unwrap();
}
let lock2 = StoreLock::try_acquire(&lock_path).unwrap();
assert!(lock2.is_some());
}
}

View file

@ -0,0 +1,277 @@
use crate::CoreError;
use karapace_store::StoreLayout;
use serde::Serialize;
use std::fs;
use std::path::Path;
const WHITEOUT_PREFIX: &str = ".wh.";
/// Report of filesystem drift detected in an environment's overlay upper layer.
#[derive(Debug, Serialize)]
pub struct DriftReport {
pub env_id: String,
pub added: Vec<String>,
pub modified: Vec<String>,
pub removed: Vec<String>,
pub has_drift: bool,
}
/// Scan the overlay upper directory for added, modified, and removed files.
pub fn diff_overlay(layout: &StoreLayout, env_id: &str) -> Result<DriftReport, CoreError> {
let upper_dir = layout.upper_dir(env_id);
let lower_dir = layout.env_path(env_id).join("lower");
let mut added = Vec::new();
let mut modified = Vec::new();
let mut removed = Vec::new();
if upper_dir.exists() {
collect_drift(
&upper_dir,
&upper_dir,
&lower_dir,
&mut added,
&mut modified,
&mut removed,
)?;
}
added.sort();
modified.sort();
removed.sort();
let has_drift = !added.is_empty() || !modified.is_empty() || !removed.is_empty();
Ok(DriftReport {
env_id: env_id.to_owned(),
added,
modified,
removed,
has_drift,
})
}
fn collect_drift(
upper_base: &Path,
current: &Path,
lower_base: &Path,
added: &mut Vec<String>,
modified: &mut Vec<String>,
removed: &mut Vec<String>,
) -> Result<(), CoreError> {
if !current.is_dir() {
return Ok(());
}
for entry in fs::read_dir(current)? {
let entry = entry?;
let path = entry.path();
let file_name = entry.file_name();
let name_str = file_name.to_string_lossy();
let rel = path
.strip_prefix(upper_base)
.unwrap_or(&path)
.to_string_lossy()
.to_string();
// Overlayfs whiteout files indicate deletion of the corresponding
// file in the lower layer.
if name_str.starts_with(WHITEOUT_PREFIX) {
let deleted_name = name_str.strip_prefix(WHITEOUT_PREFIX).unwrap_or(&name_str);
let deleted_rel = if let Some(parent) = path.parent() {
let parent_rel = parent
.strip_prefix(upper_base)
.unwrap_or(parent)
.to_string_lossy();
if parent_rel.is_empty() {
deleted_name.to_string()
} else {
format!("{parent_rel}/{deleted_name}")
}
} else {
deleted_name.to_string()
};
removed.push(deleted_rel);
continue;
}
if path.is_dir() {
collect_drift(upper_base, &path, lower_base, added, modified, removed)?;
} else {
// If the same relative path exists in the lower layer,
// this is a modification; otherwise it's a new file.
let lower_path = lower_base.join(&rel);
if lower_path.exists() {
modified.push(rel);
} else {
added.push(rel);
}
}
}
Ok(())
}
pub fn export_overlay(layout: &StoreLayout, env_id: &str, dest: &Path) -> Result<usize, CoreError> {
let upper_dir = layout.upper_dir(env_id);
if !upper_dir.exists() {
return Ok(0);
}
fs::create_dir_all(dest)?;
let mut count = 0;
copy_recursive(&upper_dir, dest, &mut count)?;
Ok(count)
}
fn copy_recursive(src: &Path, dst: &Path, count: &mut usize) -> Result<(), CoreError> {
for entry in fs::read_dir(src)? {
let entry = entry?;
let src_path = entry.path();
let dst_path = dst.join(entry.file_name());
if src_path.is_dir() {
fs::create_dir_all(&dst_path)?;
copy_recursive(&src_path, &dst_path, count)?;
} else {
fs::copy(&src_path, &dst_path)?;
*count += 1;
}
}
Ok(())
}
pub fn commit_overlay(
layout: &StoreLayout,
env_id: &str,
obj_store: &karapace_store::ObjectStore,
) -> Result<Vec<String>, CoreError> {
let upper_dir = layout.upper_dir(env_id);
if !upper_dir.exists() {
return Ok(Vec::new());
}
let mut committed = Vec::new();
commit_files(&upper_dir, obj_store, &mut committed)?;
Ok(committed)
}
fn commit_files(
current: &Path,
obj_store: &karapace_store::ObjectStore,
committed: &mut Vec<String>,
) -> Result<(), CoreError> {
if !current.is_dir() {
return Ok(());
}
for entry in fs::read_dir(current)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
commit_files(&path, obj_store, committed)?;
} else {
let data = fs::read(&path)?;
let hash = obj_store.put(&data)?;
committed.push(hash);
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn setup() -> (tempfile::TempDir, StoreLayout) {
let dir = tempfile::tempdir().unwrap();
let layout = StoreLayout::new(dir.path());
layout.initialize().unwrap();
(dir, layout)
}
#[test]
fn empty_overlay_reports_no_drift() {
let (_dir, layout) = setup();
let report = diff_overlay(&layout, "test-env").unwrap();
assert!(!report.has_drift);
assert!(report.added.is_empty());
}
#[test]
fn files_in_overlay_detected_as_drift() {
let (_dir, layout) = setup();
let upper = layout.upper_dir("test-env");
fs::create_dir_all(&upper).unwrap();
fs::write(upper.join("new_file.txt"), "content").unwrap();
let report = diff_overlay(&layout, "test-env").unwrap();
assert!(report.has_drift);
assert_eq!(report.added.len(), 1);
assert!(report.added.contains(&"new_file.txt".to_owned()));
}
#[test]
fn whiteout_files_detected_as_removed() {
let (_dir, layout) = setup();
let upper = layout.upper_dir("test-env");
fs::create_dir_all(&upper).unwrap();
// Overlayfs whiteout: .wh.deleted_file marks "deleted_file" as removed
fs::write(upper.join(".wh.deleted_file"), "").unwrap();
let report = diff_overlay(&layout, "test-env").unwrap();
assert!(report.has_drift);
assert_eq!(report.removed.len(), 1);
assert!(report.removed.contains(&"deleted_file".to_owned()));
assert!(report.added.is_empty());
}
#[test]
fn modified_files_classified_correctly() {
let (_dir, layout) = setup();
// Create a "lower" layer with an existing file
let env_dir = layout.env_path("test-env");
let lower = env_dir.join("lower");
fs::create_dir_all(&lower).unwrap();
fs::write(lower.join("existing.txt"), "original").unwrap();
// Same file in upper = modification
let upper = layout.upper_dir("test-env");
fs::create_dir_all(&upper).unwrap();
fs::write(upper.join("existing.txt"), "changed").unwrap();
// New file in upper only = added
fs::write(upper.join("brand_new.txt"), "new").unwrap();
let report = diff_overlay(&layout, "test-env").unwrap();
assert!(report.has_drift);
assert_eq!(report.modified.len(), 1);
assert!(report.modified.contains(&"existing.txt".to_owned()));
assert_eq!(report.added.len(), 1);
assert!(report.added.contains(&"brand_new.txt".to_owned()));
}
#[test]
fn export_copies_overlay_files() {
let (_dir, layout) = setup();
let upper = layout.upper_dir("test-env");
fs::create_dir_all(&upper).unwrap();
fs::write(upper.join("file.txt"), "data").unwrap();
let export_dir = tempfile::tempdir().unwrap();
let count = export_overlay(&layout, "test-env", export_dir.path()).unwrap();
assert_eq!(count, 1);
assert!(export_dir.path().join("file.txt").exists());
}
#[test]
fn commit_stores_overlay_as_objects() {
let (_dir, layout) = setup();
let upper = layout.upper_dir("test-env");
fs::create_dir_all(&upper).unwrap();
fs::write(upper.join("file.txt"), "data").unwrap();
let obj_store = karapace_store::ObjectStore::new(layout.clone());
let committed = commit_overlay(&layout, "test-env", &obj_store).unwrap();
assert_eq!(committed.len(), 1);
assert!(obj_store.exists(&committed[0]));
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,40 @@
//! Core orchestration engine for Karapace environment lifecycle.
//!
//! This crate ties together schema parsing, store operations, and runtime backends
//! into the `Engine` — the central API for building, entering, stopping, destroying,
//! and inspecting deterministic container environments. It also provides overlay
//! drift detection, concurrent store locking, and state-machine lifecycle validation.
pub mod concurrency;
pub mod drift;
pub mod engine;
pub mod lifecycle;
pub use concurrency::{install_signal_handler, shutdown_requested, StoreLock};
pub use drift::{commit_overlay, diff_overlay, export_overlay, DriftReport};
pub use engine::{BuildResult, Engine};
pub use lifecycle::validate_transition;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum CoreError {
#[error("manifest error: {0}")]
Manifest(#[from] karapace_schema::ManifestError),
#[error("lock error: {0}")]
Lock(#[from] karapace_schema::LockError),
#[error("store error: {0}")]
Store(#[from] karapace_store::StoreError),
#[error("runtime error: {0}")]
Runtime(#[from] karapace_runtime::RuntimeError),
#[error("invalid state transition: {from} -> {to}")]
InvalidTransition { from: String, to: String },
#[error("environment not found: {0}")]
EnvNotFound(String),
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("serialization error: {0}")]
Serialization(#[from] serde_json::Error),
#[error("remote error: {0}")]
Remote(#[from] karapace_remote::RemoteError),
}

View file

@ -0,0 +1,57 @@
use crate::CoreError;
use karapace_store::EnvState;
pub fn validate_transition(from: EnvState, to: EnvState) -> Result<(), CoreError> {
let valid = matches!(
(from, to),
(
EnvState::Defined
| EnvState::Built
| EnvState::Running
| EnvState::Frozen
| EnvState::Archived,
EnvState::Built
) | (
EnvState::Built,
EnvState::Running | EnvState::Frozen | EnvState::Archived
) | (EnvState::Running, EnvState::Frozen)
| (EnvState::Frozen, EnvState::Archived)
);
if valid {
Ok(())
} else {
Err(CoreError::InvalidTransition {
from: from.to_string(),
to: to.to_string(),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn valid_transitions() {
assert!(validate_transition(EnvState::Defined, EnvState::Built).is_ok());
assert!(validate_transition(EnvState::Built, EnvState::Built).is_ok()); // idempotent rebuild
assert!(validate_transition(EnvState::Built, EnvState::Running).is_ok());
assert!(validate_transition(EnvState::Built, EnvState::Frozen).is_ok());
assert!(validate_transition(EnvState::Running, EnvState::Built).is_ok());
assert!(validate_transition(EnvState::Running, EnvState::Frozen).is_ok());
assert!(validate_transition(EnvState::Frozen, EnvState::Built).is_ok());
assert!(validate_transition(EnvState::Built, EnvState::Archived).is_ok());
assert!(validate_transition(EnvState::Frozen, EnvState::Archived).is_ok());
assert!(validate_transition(EnvState::Archived, EnvState::Built).is_ok());
}
#[test]
fn invalid_transitions() {
assert!(validate_transition(EnvState::Defined, EnvState::Running).is_err());
assert!(validate_transition(EnvState::Defined, EnvState::Frozen).is_err());
assert!(validate_transition(EnvState::Archived, EnvState::Running).is_err());
assert!(validate_transition(EnvState::Running, EnvState::Defined).is_err());
assert!(validate_transition(EnvState::Frozen, EnvState::Running).is_err());
}
}

View file

@ -0,0 +1,391 @@
#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
//! Real crash recovery tests using fork + SIGKILL.
//!
//! These tests fork a child process that runs Karapace operations in a tight
//! loop, kill it mid-flight with SIGKILL, then verify the store is recoverable
//! and consistent in the parent.
//!
//! This validates that:
//! - WAL recovery cleans up incomplete operations
//! - No partially created environment directories survive
//! - No corrupted metadata remains
//! - Store integrity check passes after recovery
//! - Lock state is released (flock auto-released on process death)
use karapace_core::{Engine, StoreLock};
use karapace_store::StoreLayout;
use std::fs;
use std::path::Path;
fn write_manifest(dir: &Path, content: &str) -> std::path::PathBuf {
let path = dir.join("karapace.toml");
fs::write(&path, content).unwrap();
path
}
fn mock_manifest(packages: &[&str]) -> String {
format!(
r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = [{}]
[runtime]
backend = "mock"
"#,
packages
.iter()
.map(|p| format!("\"{p}\""))
.collect::<Vec<_>>()
.join(", ")
)
}
/// Verify that the store is in a consistent state after crash recovery.
fn verify_store_healthy(store_path: &Path) {
// Creating a new Engine triggers WAL recovery
let engine = Engine::new(store_path);
let layout = StoreLayout::new(store_path);
// WAL must be empty after recovery
let wal = karapace_store::WriteAheadLog::new(&layout);
let incomplete = wal.list_incomplete().unwrap();
assert!(
incomplete.is_empty(),
"WAL must be clean after recovery, found {} incomplete entries",
incomplete.len()
);
// Store integrity check must pass
let report = karapace_store::verify_store_integrity(&layout).unwrap();
assert!(
report.failed.is_empty(),
"store integrity check found {} failures: {:?}",
report.failed.len(),
report.failed
);
// All listed environments must be inspectable
let envs = engine.list().unwrap();
for env in &envs {
let meta = engine.inspect(&env.env_id).unwrap();
// No environment should be stuck in Running state after crash recovery
// (WAL ResetState rollback should have fixed it)
assert_ne!(
meta.state,
karapace_store::EnvState::Running,
"env {} stuck in Running after crash recovery",
env.env_id
);
}
// Lock must be acquirable (proves the dead child released it)
let lock = StoreLock::try_acquire(&layout.lock_file()).unwrap();
assert!(
lock.is_some(),
"store lock must be acquirable after child death"
);
// No orphaned env directories (dirs in env/ without matching metadata)
let env_base = layout.env_dir();
if env_base.exists() {
if let Ok(entries) = fs::read_dir(&env_base) {
let meta_store = karapace_store::MetadataStore::new(layout.clone());
for entry in entries.flatten() {
let dir_name = entry.file_name();
let dir_name_str = dir_name.to_string_lossy();
// Skip dotfiles
if dir_name_str.starts_with('.') {
continue;
}
// Every env dir should have matching metadata (or be cleaned up by WAL)
// We don't assert this is always true because the build might have
// completed successfully before the kill. But if metadata exists,
// it should be readable.
if meta_store.get(&dir_name_str).is_ok() {
// Metadata exists and is valid — good
} else {
// Orphaned env dir — WAL should have cleaned it, but if the
// build completed and was killed before metadata write,
// this is acceptable as long as it doesn't cause errors
}
}
}
}
// No stale .running markers
if env_base.exists() {
if let Ok(entries) = fs::read_dir(&env_base) {
for entry in entries.flatten() {
let running = entry.path().join(".running");
assert!(
!running.exists(),
"stale .running marker at {}",
running.display()
);
}
}
}
}
/// Fork a child that runs `child_fn` in a loop, kill it after `delay`,
/// then verify store health in the parent.
///
/// # Safety
/// Uses `libc::fork()` which is inherently unsafe. The child must not
/// return — it either loops forever or exits.
unsafe fn crash_test(store_path: &Path, delay: std::time::Duration, child_fn: fn(&Path)) {
let pid = libc::fork();
assert!(pid >= 0, "fork() failed");
if pid == 0 {
// === CHILD PROCESS ===
// Run the operation in a tight loop until killed
child_fn(store_path);
// If child_fn returns, exit immediately
libc::_exit(0);
}
// === PARENT PROCESS ===
std::thread::sleep(delay);
// Send SIGKILL — no chance for cleanup
let ret = libc::kill(pid, libc::SIGKILL);
assert_eq!(ret, 0, "kill() failed");
// Wait for child to die
let mut status: i32 = 0;
let waited = libc::waitpid(pid, &raw mut status, 0);
assert_eq!(waited, pid, "waitpid() failed");
// Now verify the store
verify_store_healthy(store_path);
}
/// Child function: build environments in a tight loop
fn child_build_loop(store_path: &Path) {
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store_path);
for i in 0u64.. {
let pkgs: Vec<String> = (0..=(i % 4)).map(|j| format!("pkg{j}")).collect();
let pkg_refs: Vec<&str> = pkgs.iter().map(String::as_str).collect();
let manifest = write_manifest(project.path(), &mock_manifest(&pkg_refs));
let _ = engine.build(&manifest);
}
}
/// Child function: build + destroy in a tight loop
fn child_build_destroy_loop(store_path: &Path) {
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store_path);
for i in 0u64.. {
let pkgs: Vec<String> = (0..=(i % 2)).map(|j| format!("pkg{j}")).collect();
let pkg_refs: Vec<&str> = pkgs.iter().map(String::as_str).collect();
let manifest = write_manifest(project.path(), &mock_manifest(&pkg_refs));
if let Ok(r) = engine.build(&manifest) {
let _ = engine.destroy(&r.identity.env_id);
}
}
}
/// Child function: build + commit in a tight loop
fn child_build_commit_loop(store_path: &Path) {
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store_path);
let manifest = write_manifest(project.path(), &mock_manifest(&["git"]));
if let Ok(r) = engine.build(&manifest) {
let env_id = r.identity.env_id.to_string();
let upper = store_path.join("env").join(&env_id).join("upper");
let _ = fs::create_dir_all(&upper);
for i in 0u64.. {
let _ = fs::write(upper.join(format!("file_{i}.txt")), format!("data {i}"));
let _ = engine.commit(&env_id);
}
}
}
/// Child function: build + commit + restore in a tight loop
fn child_commit_restore_loop(store_path: &Path) {
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store_path);
let manifest = write_manifest(project.path(), &mock_manifest(&["git"]));
if let Ok(r) = engine.build(&manifest) {
let env_id = r.identity.env_id.to_string();
let upper = store_path.join("env").join(&env_id).join("upper");
let _ = fs::create_dir_all(&upper);
// Create initial snapshot
let _ = fs::write(upper.join("base.txt"), "base content");
if let Ok(snap_hash) = engine.commit(&env_id) {
for i in 0u64.. {
let _ = fs::write(upper.join(format!("file_{i}.txt")), format!("data {i}"));
let _ = engine.commit(&env_id);
let _ = engine.restore(&env_id, &snap_hash);
}
}
}
}
/// Child function: build + GC in a tight loop
fn child_gc_loop(store_path: &Path) {
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store_path);
// Build several environments
let mut env_ids = Vec::new();
for i in 0..5 {
let pkgs: Vec<String> = (0..=i).map(|j| format!("pkg{j}")).collect();
let pkg_refs: Vec<&str> = pkgs.iter().map(String::as_str).collect();
let manifest = write_manifest(project.path(), &mock_manifest(&pkg_refs));
if let Ok(r) = engine.build(&manifest) {
env_ids.push(r.identity.env_id.to_string());
}
}
let layout = StoreLayout::new(store_path);
for i in 0u64.. {
// Destroy one environment per cycle
let idx = (i as usize) % env_ids.len();
let _ = engine.destroy(&env_ids[idx]);
// Run GC
if let Ok(Some(lock)) = StoreLock::try_acquire(&layout.lock_file()) {
let _ = engine.gc(&lock, false);
}
// Rebuild
let pkgs: Vec<String> = (0..=idx).map(|j| format!("pkg{j}")).collect();
let pkg_refs: Vec<&str> = pkgs.iter().map(String::as_str).collect();
let manifest = write_manifest(project.path(), &mock_manifest(&pkg_refs));
if let Ok(r) = engine.build(&manifest) {
env_ids[idx] = r.identity.env_id.to_string();
}
}
}
/// Child function: build + enter in a tight loop (tests ResetState WAL)
fn child_enter_loop(store_path: &Path) {
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store_path);
let manifest = write_manifest(project.path(), &mock_manifest(&["git"]));
if let Ok(r) = engine.build(&manifest) {
let env_id = r.identity.env_id.to_string();
for _ in 0u64.. {
let _ = engine.enter(&env_id);
}
}
}
// --- Crash tests ---
// Each test runs with multiple delay values to increase the chance of hitting
// different points in the operation lifecycle.
#[test]
fn crash_during_build() {
for delay_ms in [1, 5, 10, 20, 50] {
let store = tempfile::tempdir().unwrap();
// Pre-initialize the store
let layout = StoreLayout::new(store.path());
layout.initialize().unwrap();
unsafe {
crash_test(
store.path(),
std::time::Duration::from_millis(delay_ms),
child_build_loop,
);
}
}
}
#[test]
fn crash_during_build_destroy() {
for delay_ms in [1, 5, 10, 20, 50] {
let store = tempfile::tempdir().unwrap();
let layout = StoreLayout::new(store.path());
layout.initialize().unwrap();
unsafe {
crash_test(
store.path(),
std::time::Duration::from_millis(delay_ms),
child_build_destroy_loop,
);
}
}
}
#[test]
fn crash_during_commit() {
for delay_ms in [1, 5, 10, 20, 50] {
let store = tempfile::tempdir().unwrap();
let layout = StoreLayout::new(store.path());
layout.initialize().unwrap();
unsafe {
crash_test(
store.path(),
std::time::Duration::from_millis(delay_ms),
child_build_commit_loop,
);
}
}
}
#[test]
fn crash_during_restore() {
for delay_ms in [1, 5, 10, 20, 50] {
let store = tempfile::tempdir().unwrap();
let layout = StoreLayout::new(store.path());
layout.initialize().unwrap();
unsafe {
crash_test(
store.path(),
std::time::Duration::from_millis(delay_ms),
child_commit_restore_loop,
);
}
}
}
#[test]
fn crash_during_gc() {
for delay_ms in [5, 10, 20, 50, 100] {
let store = tempfile::tempdir().unwrap();
let layout = StoreLayout::new(store.path());
layout.initialize().unwrap();
unsafe {
crash_test(
store.path(),
std::time::Duration::from_millis(delay_ms),
child_gc_loop,
);
}
}
}
#[test]
fn crash_during_enter() {
for delay_ms in [1, 5, 10, 20, 50] {
let store = tempfile::tempdir().unwrap();
let layout = StoreLayout::new(store.path());
layout.initialize().unwrap();
unsafe {
crash_test(
store.path(),
std::time::Duration::from_millis(delay_ms),
child_enter_loop,
);
}
}
}

View file

@ -0,0 +1,630 @@
//! End-to-end tests that exercise the real namespace backend.
//!
//! These tests are `#[ignore]` by default because they require:
//! - Linux with user namespace support
//! - `fuse-overlayfs` installed
//! - `curl` installed
//! - Network access (to download base images)
//!
//! Run with: `cargo test --test e2e -- --ignored`
use karapace_core::Engine;
use karapace_store::{EnvState, StoreLayout};
use std::fs;
use std::path::Path;
fn namespace_manifest(packages: &[&str]) -> String {
let pkgs = if packages.is_empty() {
String::new()
} else {
let list: Vec<String> = packages.iter().map(|p| format!("\"{p}\"")).collect();
format!("\n[system]\npackages = [{}]\n", list.join(", "))
};
format!(
r#"manifest_version = 1
[base]
image = "rolling"
{pkgs}
[runtime]
backend = "namespace"
"#
)
}
fn write_manifest(dir: &Path, content: &str) -> std::path::PathBuf {
let path = dir.join("karapace.toml");
fs::write(&path, content).unwrap();
path
}
fn prereqs_available() -> bool {
let ns = karapace_runtime::check_namespace_prereqs();
if !ns.is_empty() {
let msg = karapace_runtime::format_missing(&ns);
assert!(
std::env::var("CI").is_err(),
"CI FATAL: E2E prerequisites missing — tests cannot silently skip in CI.\n{msg}"
);
eprintln!("skipping E2E: missing prerequisites: {msg}");
return false;
}
true
}
/// Build a minimal environment with the namespace backend (no packages).
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_build_minimal_namespace() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
assert!(!result.identity.env_id.is_empty());
assert!(!result.identity.short_id.is_empty());
let meta = engine.inspect(&result.identity.env_id).unwrap();
assert_eq!(meta.state, EnvState::Built);
// Verify the environment directory was created
let layout = StoreLayout::new(store.path());
assert!(layout.env_path(&result.identity.env_id).exists());
// Lock file was written
assert!(project.path().join("karapace.lock").exists());
}
/// Exec a command inside a built environment and verify stdout.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_exec_in_namespace() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
// Exec `echo hello` inside the container
let cmd = vec!["echo".to_owned(), "hello".to_owned()];
// exec() writes to stdout/stderr directly; just verify it doesn't error
engine.exec(&result.identity.env_id, &cmd).unwrap();
}
/// Destroy cleans up all overlay directories.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_destroy_cleans_up() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
let layout = StoreLayout::new(store.path());
assert!(layout.env_path(&env_id).exists());
engine.destroy(&env_id).unwrap();
// Environment directory should be gone
assert!(!layout.env_path(&env_id).exists());
}
/// Rebuild produces the same env_id for the same manifest.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_rebuild_determinism() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let r1 = engine.build(&manifest).unwrap();
let r2 = engine.rebuild(&manifest).unwrap();
assert_eq!(
r1.identity.env_id, r2.identity.env_id,
"rebuild must produce the same env_id"
);
}
/// Snapshot and restore round-trip with real namespace backend.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_snapshot_and_restore() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
// Write a file to the upper dir (simulating user modifications)
let upper = StoreLayout::new(store.path()).upper_dir(&env_id);
if upper.exists() {
// Clear build artifacts first
let _ = fs::remove_dir_all(&upper);
}
fs::create_dir_all(&upper).unwrap();
fs::write(upper.join("user_data.txt"), "snapshot baseline").unwrap();
// Commit a snapshot
let snapshot_hash = engine.commit(&env_id).unwrap();
assert!(!snapshot_hash.is_empty());
// Verify snapshot is listed
let snapshots = engine.list_snapshots(&env_id).unwrap();
assert_eq!(snapshots.len(), 1);
// Mutate upper dir after snapshot
fs::write(upper.join("user_data.txt"), "MODIFIED").unwrap();
fs::write(upper.join("extra.txt"), "should disappear").unwrap();
// Restore from snapshot
engine.restore(&env_id, &snapshot_hash).unwrap();
// Verify restore worked
assert_eq!(
fs::read_to_string(upper.join("user_data.txt")).unwrap(),
"snapshot baseline"
);
assert!(
!upper.join("extra.txt").exists(),
"extra file must be gone after restore"
);
}
/// Overlay correctness: files written in upper are visible, base is read-only.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_overlay_file_visibility() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
let layout = StoreLayout::new(store.path());
let upper = layout.upper_dir(&env_id);
fs::create_dir_all(&upper).unwrap();
// Write a file in upper — should be visible via exec
fs::write(upper.join("test_marker.txt"), "visible").unwrap();
// exec `cat /test_marker.txt` should succeed (file visible through overlay)
let cmd = vec!["cat".to_owned(), "/test_marker.txt".to_owned()];
let result = engine.exec(&env_id, &cmd);
// If overlay is correctly mounted, the file is visible
assert!(
result.is_ok(),
"files in upper dir must be visible through overlay"
);
}
/// Enter/exit cycle: repeated enter should not leak state.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_enter_exit_cycle() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
// Run exec 20 times — should not accumulate state or leak
for i in 0..20 {
let cmd = vec!["echo".to_owned(), format!("cycle-{i}")];
engine.exec(&env_id, &cmd).unwrap();
}
// Environment should still be in Built state
let meta = engine.inspect(&env_id).unwrap();
assert_eq!(
meta.state,
EnvState::Built,
"env must be Built after enter/exit cycles"
);
}
// --- IG-M1: Real Runtime Backend Validation ---
/// Verify no fuse-overlayfs mounts leak after build + exec + destroy cycle.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_mount_leak_detection() {
if !prereqs_available() {
return;
}
let mounts_before = fs::read_to_string("/proc/mounts").unwrap_or_default();
let fuse_before = mounts_before
.lines()
.filter(|l| l.contains("fuse-overlayfs"))
.count();
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
// Exec inside
engine
.exec(&env_id, &["echo".to_owned(), "leak-test".to_owned()])
.unwrap();
// Destroy
engine.destroy(&env_id).unwrap();
let mounts_after = fs::read_to_string("/proc/mounts").unwrap_or_default();
let fuse_after = mounts_after
.lines()
.filter(|l| l.contains("fuse-overlayfs"))
.count();
assert_eq!(
fuse_before, fuse_after,
"fuse-overlayfs mount count must not change after build+exec+destroy: before={fuse_before}, after={fuse_after}"
);
}
/// Repeated build/destroy cycles must not accumulate state or stale mounts.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_build_destroy_20_cycles() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let layout = StoreLayout::new(store.path());
for i in 0..20 {
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
engine.destroy(&env_id).unwrap();
assert!(
!layout.env_path(&env_id).exists(),
"env dir must be gone after destroy in cycle {i}"
);
}
// Final integrity check
let report = karapace_store::verify_store_integrity(&layout).unwrap();
assert!(
report.failed.is_empty(),
"store integrity must pass after 20 build/destroy cycles: {:?}",
report.failed
);
// No stale overlays
let mounts = fs::read_to_string("/proc/mounts").unwrap_or_default();
let store_path_str = store.path().to_string_lossy();
let stale: Vec<&str> = mounts
.lines()
.filter(|l| l.contains("fuse-overlayfs") && l.contains(store_path_str.as_ref()))
.collect();
assert!(
stale.is_empty(),
"no stale overlayfs mounts after 20 cycles: {stale:?}"
);
}
/// If an OCI runtime (crun/runc) is available, build and destroy with it.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_oci_build_if_available() {
if !prereqs_available() {
return;
}
// Check if crun or runc exists
let has_oci = std::process::Command::new("which")
.arg("crun")
.output()
.map(|o| o.status.success())
.unwrap_or(false)
|| std::process::Command::new("which")
.arg("runc")
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !has_oci {
assert!(
std::env::var("CI").is_err(),
"CI FATAL: OCI test requires crun or runc — install in CI or remove test from CI job"
);
eprintln!("skipping OCI test: no crun or runc found");
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest_content = r#"manifest_version = 1
[base]
image = "rolling"
[runtime]
backend = "oci"
"#;
let manifest = write_manifest(project.path(), manifest_content);
let result = engine.build(&manifest).unwrap();
let env_id = result.identity.env_id.clone();
let meta = engine.inspect(&env_id).unwrap();
assert_eq!(meta.state, EnvState::Built);
engine.destroy(&env_id).unwrap();
let layout = StoreLayout::new(store.path());
assert!(
!layout.env_path(&env_id).exists(),
"OCI env dir must be cleaned up after destroy"
);
}
/// Concurrent exec calls on the same environment must all succeed.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_namespace_concurrent_exec() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = std::sync::Arc::new(Engine::new(store.path()));
let manifest = write_manifest(project.path(), &namespace_manifest(&[]));
let result = engine.build(&manifest).unwrap();
let env_id = std::sync::Arc::new(result.identity.env_id.clone());
let handles: Vec<_> = (0..4)
.map(|i| {
let eng = std::sync::Arc::clone(&engine);
let eid = std::sync::Arc::clone(&env_id);
std::thread::spawn(move || {
let cmd = vec!["echo".to_owned(), format!("thread-{i}")];
eng.exec(&eid, &cmd).unwrap();
})
})
.collect();
for h in handles {
h.join().expect("exec thread must not panic");
}
// No stale .running markers
let layout = StoreLayout::new(store.path());
let env_path = layout.env_path(&env_id);
if env_path.exists() {
let running_markers: Vec<_> = fs::read_dir(&env_path)
.unwrap()
.filter_map(Result::ok)
.filter(|e| e.file_name().to_string_lossy().ends_with(".running"))
.collect();
assert!(
running_markers.is_empty(),
"no stale .running markers after concurrent exec: {:?}",
running_markers
.iter()
.map(fs::DirEntry::file_name)
.collect::<Vec<_>>()
);
}
}
// --- IG-M2: Real Package Resolution Validation ---
/// Verify resolved packages have real versions (not mock/unresolved).
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_resolve_pins_exact_versions() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&["curl"]));
let result = engine.build(&manifest).unwrap();
for pkg in &result.lock_file.resolved_packages {
assert!(
!pkg.version.is_empty(),
"package {} has empty version",
pkg.name
);
assert_ne!(
pkg.version, "0.0.0-mock",
"package {} has mock version — real resolver not running",
pkg.name
);
// Version should contain at least one digit
assert!(
pkg.version.chars().any(|c| c.is_ascii_digit()),
"package {} version '{}' contains no digits — suspect",
pkg.name,
pkg.version
);
}
}
/// Rebuild same manifest must produce identical env_id and resolved versions.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_resolve_deterministic_across_rebuilds() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&["curl"]));
let r1 = engine.build(&manifest).unwrap();
let r2 = engine.rebuild(&manifest).unwrap();
assert_eq!(
r1.identity.env_id, r2.identity.env_id,
"same manifest must produce same env_id"
);
assert_eq!(
r1.lock_file.resolved_packages, r2.lock_file.resolved_packages,
"resolved packages must be identical across rebuilds"
);
}
/// Building with a non-existent package must fail cleanly.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_resolve_nonexistent_package_fails() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(
project.path(),
&namespace_manifest(&["nonexistent-pkg-that-does-not-exist-xyz"]),
);
let result = engine.build(&manifest);
assert!(result.is_err(), "build with non-existent package must fail");
// No orphaned env directories
let layout = StoreLayout::new(store.path());
let env_dir = layout.env_dir();
if env_dir.exists() {
let entries: Vec<_> = fs::read_dir(&env_dir)
.unwrap()
.filter_map(Result::ok)
.collect();
assert!(
entries.is_empty(),
"no orphaned env dirs after failed build: {:?}",
entries
.iter()
.map(fs::DirEntry::file_name)
.collect::<Vec<_>>()
);
}
}
/// Build with multiple packages — all must have non-empty resolved versions.
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_resolve_multiple_packages() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
let manifest = write_manifest(project.path(), &namespace_manifest(&["curl", "git"]));
let result = engine.build(&manifest).unwrap();
assert!(
result.lock_file.resolved_packages.len() >= 2,
"at least 2 resolved packages expected, got {}",
result.lock_file.resolved_packages.len()
);
for pkg in &result.lock_file.resolved_packages {
assert!(
!pkg.version.is_empty() && pkg.version != "unresolved",
"package {} has unresolved version",
pkg.name
);
}
}
/// Build with packages (requires network to download image + install).
#[test]
#[ignore = "requires Linux user namespaces, fuse-overlayfs, curl, and network"]
fn e2e_build_with_packages() {
if !prereqs_available() {
return;
}
let store = tempfile::tempdir().unwrap();
let project = tempfile::tempdir().unwrap();
let engine = Engine::new(store.path());
// Use a package whose zypper name matches its RPM name on openSUSE
let manifest = write_manifest(project.path(), &namespace_manifest(&["curl"]));
let result = engine.build(&manifest).unwrap();
assert!(!result.identity.env_id.is_empty());
// Lock file should have resolved packages with real versions
let lock = result.lock_file;
assert_eq!(lock.lock_version, 2);
assert!(!lock.resolved_packages.is_empty());
// At least one package should have a resolved version.
// Note: some package names may not match their RPM names exactly,
// causing fallback to "unresolved". This is a known limitation.
let resolved_count = lock
.resolved_packages
.iter()
.filter(|p| p.version != "unresolved")
.count();
assert!(
resolved_count > 0,
"at least one package should have a resolved version, got: {:?}",
lock.resolved_packages
);
}

View file

@ -0,0 +1,444 @@
//! IG-M4: True disk-full (ENOSPC) simulation tests.
//!
//! These tests mount a tiny tmpfs to trigger real ENOSPC conditions.
//! They require root (or equivalent) to mount tmpfs, so they are ignored
//! by default and run in CI with: `sudo -E cargo test --test enospc -- --ignored`
use std::path::{Path, PathBuf};
use std::process::Command;
/// Mount a tmpfs of the given size (in KB) at `path`.
/// Returns true if successful. Requires root.
fn mount_tiny_tmpfs(path: &Path, size_kb: u64) -> bool {
std::fs::create_dir_all(path).unwrap();
let status = Command::new("mount")
.args(["-t", "tmpfs", "-o", &format!("size={size_kb}k"), "tmpfs"])
.arg(path)
.status();
matches!(status, Ok(s) if s.success())
}
/// Unmount the tmpfs at `path`.
fn unmount(path: &Path) {
let _ = Command::new("umount").arg(path).status();
}
/// RAII guard that unmounts on drop.
struct TmpfsGuard {
path: PathBuf,
}
impl TmpfsGuard {
fn mount(path: &Path, size_kb: u64) -> Option<Self> {
if mount_tiny_tmpfs(path, size_kb) {
Some(Self {
path: path.to_path_buf(),
})
} else {
None
}
}
}
impl Drop for TmpfsGuard {
fn drop(&mut self) {
unmount(&self.path);
}
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_object_put_returns_io_error() {
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("tiny");
let _guard = TmpfsGuard::mount(&mount_point, 64)
.expect("failed to mount tmpfs — are you running as root?");
let layout = karapace_store::StoreLayout::new(&mount_point);
layout.initialize().unwrap();
let obj_store = karapace_store::ObjectStore::new(layout);
// Write objects until we hit ENOSPC
let mut hit_error = false;
for i in 0..10_000 {
let data = format!("object-data-{i}-padding-to-fill-disk-quickly").repeat(10);
match obj_store.put(data.as_bytes()) {
Ok(_) => {}
Err(e) => {
let msg = format!("{e}");
eprintln!("ENOSPC triggered at object {i}: {msg}");
hit_error = true;
// Must be an Io error, never a panic
assert!(
matches!(e, karapace_store::StoreError::Io(_)),
"expected StoreError::Io, got: {e}"
);
break;
}
}
}
assert!(
hit_error,
"should have hit ENOSPC within 10000 objects on 64KB tmpfs"
);
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_build_fails_cleanly() {
use karapace_core::Engine;
use karapace_store::StoreLayout;
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("tiny");
let _guard = TmpfsGuard::mount(&mount_point, 64)
.expect("failed to mount tmpfs — are you running as root?");
let layout = StoreLayout::new(&mount_point);
layout.initialize().unwrap();
let manifest = r#"
manifest_version = 1
[base]
image = "rolling"
[system]
packages = ["curl", "git", "vim", "wget", "htop"]
"#;
let manifest_path = mount_point.join("karapace.toml");
std::fs::write(&manifest_path, manifest).unwrap();
let engine = Engine::new(&mount_point);
let result = engine.build(&manifest_path);
// Build must fail (ENOSPC), not panic
assert!(result.is_err(), "build on 64KB tmpfs must fail");
// WAL must have no incomplete entries after error cleanup
let wal = karapace_store::WriteAheadLog::new(&layout);
let incomplete = wal.list_incomplete().unwrap_or_default();
assert!(
incomplete.is_empty(),
"WAL must be clean after failed build, found {} incomplete entries",
incomplete.len()
);
// No orphaned env directories
let env_dir = layout.env_dir();
if env_dir.exists() {
let entries: Vec<_> = std::fs::read_dir(&env_dir)
.unwrap()
.filter_map(Result::ok)
.collect();
assert!(
entries.is_empty(),
"no orphaned env dirs after failed build, found: {:?}",
entries
.iter()
.map(std::fs::DirEntry::file_name)
.collect::<Vec<_>>()
);
}
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_wal_write_fails() {
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("tiny");
let _guard = TmpfsGuard::mount(&mount_point, 4)
.expect("failed to mount tmpfs — are you running as root?");
// Create minimal store structure
let store_dir = mount_point.join("store");
std::fs::create_dir_all(store_dir.join("wal")).unwrap();
// Fill the tmpfs with dummy data until nearly full
for i in 0..100 {
let path = mount_point.join(format!("filler_{i}"));
if std::fs::write(&path, [0u8; 512]).is_err() {
break;
}
}
let layout = karapace_store::StoreLayout::new(&mount_point);
let wal = karapace_store::WriteAheadLog::new(&layout);
// WAL begin should fail due to ENOSPC
let result = wal.begin(karapace_store::WalOpKind::Build, "test_env");
assert!(
result.is_err(),
"WAL begin on full disk must fail, not panic"
);
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_commit_fails_cleanly() {
use karapace_core::Engine;
use karapace_store::StoreLayout;
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("medium");
// 256KB — enough for build, but commit with large upper should fail
let _guard = TmpfsGuard::mount(&mount_point, 256)
.expect("failed to mount tmpfs — are you running as root?");
let layout = StoreLayout::new(&mount_point);
layout.initialize().unwrap();
let manifest = r#"
manifest_version = 1
[base]
image = "rolling"
"#;
let manifest_path = mount_point.join("karapace.toml");
std::fs::write(&manifest_path, manifest).unwrap();
let engine = Engine::new(&mount_point);
// Build must succeed on 256KB — if it doesn't, the test setup is wrong
let build_result = engine.build(&manifest_path);
assert!(
build_result.is_ok(),
"build on 256KB tmpfs must succeed for commit test to be valid: {:?}",
build_result.err()
);
let env_id = build_result.unwrap().identity.env_id;
// Write enough data to the upper dir to fill the disk
let upper = layout.upper_dir(&env_id);
std::fs::create_dir_all(&upper).unwrap();
let mut filled = false;
for i in 0..500 {
let path = upper.join(format!("bigfile_{i}"));
if std::fs::write(&path, [0xAB; 1024]).is_err() {
filled = true;
break;
}
}
assert!(
filled,
"must fill disk before commit — 256KB tmpfs should be exhaustible"
);
// Commit MUST fail due to ENOSPC during layer packing
let commit_result = engine.commit(&env_id);
assert!(
commit_result.is_err(),
"commit on full disk MUST fail — test is invalid if it succeeds"
);
// Verify env state is still Built (not corrupted)
let meta = karapace_store::MetadataStore::new(layout.clone())
.get(&env_id)
.unwrap();
assert_eq!(
meta.state,
karapace_store::EnvState::Built,
"env state must remain Built after failed commit"
);
// No partial commit artifacts
let layers_dir = layout.layers_dir();
if layers_dir.exists() {
let staging = layout.staging_dir();
if staging.exists() {
let staging_entries: Vec<_> = std::fs::read_dir(&staging)
.unwrap()
.filter_map(Result::ok)
.collect();
assert!(
staging_entries.is_empty(),
"no partial staging artifacts after failed commit: {:?}",
staging_entries
.iter()
.map(std::fs::DirEntry::file_name)
.collect::<Vec<_>>()
);
}
}
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_recovery_after_freeing_space() {
use karapace_store::{ObjectStore, StoreLayout};
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("recov");
let _guard = TmpfsGuard::mount(&mount_point, 128)
.expect("failed to mount tmpfs — are you running as root?");
let layout = StoreLayout::new(&mount_point);
layout.initialize().unwrap();
let obj_store = ObjectStore::new(layout);
// Fill with objects
let mut hashes = Vec::new();
for i in 0..500 {
let data = format!("fill-data-{i}").repeat(5);
match obj_store.put(data.as_bytes()) {
Ok(h) => hashes.push(h),
Err(_) => break,
}
}
assert!(!hashes.is_empty(), "should have stored at least one object");
// Attempt one more write — MUST fail (disk full)
let big_data = [0xCD; 4096];
let err_result = obj_store.put(&big_data);
assert!(
err_result.is_err(),
"128KB tmpfs must be full after filling — test setup invalid if write succeeds"
);
// Delete half the objects to free space
let objects_dir = mount_point.join("store").join("objects");
let half = hashes.len() / 2;
for h in &hashes[..half] {
let _ = std::fs::remove_file(objects_dir.join(h));
}
// Now writes should succeed again
let recovery_result = obj_store.put(b"recovery data after freeing space");
assert!(
recovery_result.is_ok(),
"write must succeed after freeing space: {:?}",
recovery_result.err()
);
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_layer_put_fails_cleanly() {
use karapace_store::{LayerKind, LayerManifest, LayerStore, StoreLayout};
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("tiny_layer");
let _guard = TmpfsGuard::mount(&mount_point, 8)
.expect("failed to mount tmpfs — are you running as root?");
let layout = StoreLayout::new(&mount_point);
layout.initialize().unwrap();
// Fill the tmpfs
for i in 0..200 {
let path = mount_point.join(format!("filler_{i}"));
if std::fs::write(&path, [0u8; 256]).is_err() {
break;
}
}
let layer_store = LayerStore::new(layout.clone());
let manifest = LayerManifest {
hash: "test_layer_enospc".to_owned(),
kind: LayerKind::Base,
parent: None,
object_refs: vec!["obj1".to_owned(), "obj2".to_owned()],
read_only: true,
tar_hash: String::new(),
};
let result = layer_store.put(&manifest);
assert!(
result.is_err(),
"layer put on full disk MUST fail, not succeed"
);
assert!(
matches!(
result.as_ref().unwrap_err(),
karapace_store::StoreError::Io(_)
),
"expected StoreError::Io, got: {:?}",
result.unwrap_err()
);
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_metadata_put_fails_cleanly() {
use karapace_store::{EnvMetadata, EnvState, MetadataStore, StoreLayout};
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("tiny_meta");
let _guard = TmpfsGuard::mount(&mount_point, 8)
.expect("failed to mount tmpfs — are you running as root?");
let layout = StoreLayout::new(&mount_point);
layout.initialize().unwrap();
// Fill the tmpfs
for i in 0..200 {
let path = mount_point.join(format!("filler_{i}"));
if std::fs::write(&path, [0u8; 256]).is_err() {
break;
}
}
let meta_store = MetadataStore::new(layout);
let meta = EnvMetadata {
env_id: "enospc_test_env".into(),
short_id: "enospc_test".into(),
name: Some("enospc-test".to_owned()),
state: EnvState::Built,
base_layer: "fake_layer".into(),
dependency_layers: vec![],
policy_layer: None,
manifest_hash: "fake_hash".into(),
ref_count: 1,
created_at: "2025-01-01T00:00:00Z".to_owned(),
updated_at: "2025-01-01T00:00:00Z".to_owned(),
checksum: None,
};
let result = meta_store.put(&meta);
assert!(
result.is_err(),
"metadata put on full disk MUST fail, not succeed"
);
assert!(
matches!(
result.as_ref().unwrap_err(),
karapace_store::StoreError::Io(_)
),
"expected StoreError::Io, got: {:?}",
result.unwrap_err()
);
}
#[test]
#[ignore = "requires root for tmpfs mount"]
fn enospc_version_file_write_fails() {
use karapace_store::StoreLayout;
let base = tempfile::tempdir().unwrap();
let mount_point = base.path().join("tiny_ver");
// Very small: just enough for dirs but not for version file after fill
let _guard = TmpfsGuard::mount(&mount_point, 4)
.expect("failed to mount tmpfs — are you running as root?");
// Manually create minimal dirs (initialize writes version file, we want it to fail)
let store_dir = mount_point.join("store");
std::fs::create_dir_all(store_dir.join("objects")).unwrap();
std::fs::create_dir_all(store_dir.join("layers")).unwrap();
std::fs::create_dir_all(store_dir.join("metadata")).unwrap();
std::fs::create_dir_all(store_dir.join("staging")).unwrap();
std::fs::create_dir_all(mount_point.join("env")).unwrap();
// Fill the tmpfs completely
for i in 0..200 {
let path = mount_point.join(format!("filler_{i}"));
if std::fs::write(&path, [0u8; 256]).is_err() {
break;
}
}
// Now try to initialize (which writes the version file) — must fail
let layout = StoreLayout::new(&mount_point);
let result = layout.initialize();
assert!(
result.is_err(),
"StoreLayout::initialize on full disk MUST fail when writing version file"
);
}

File diff suppressed because it is too large Load diff