mirror of
https://github.com/marcoallegretti/karapace.git
synced 2026-03-27 05:53:10 +00:00
fix(core): make stop robust to stale state
- Stop can now succeed when metadata is stale but runtime indicates running\n- Only uses .running PID fallback for namespace/mock; avoids OCI PID misuse\n- Preserves non-Running states (e.g. Frozen) after stopping\n- Adds integration tests for stale metadata and Frozen stop
This commit is contained in:
parent
598d07239e
commit
d78a770502
2 changed files with 142 additions and 7 deletions
|
|
@ -498,7 +498,58 @@ impl Engine {
|
||||||
.get(env_id)
|
.get(env_id)
|
||||||
.map_err(|_| CoreError::EnvNotFound(env_id.to_owned()))?;
|
.map_err(|_| CoreError::EnvNotFound(env_id.to_owned()))?;
|
||||||
|
|
||||||
if meta.state != EnvState::Running {
|
let target_state = if meta.state == EnvState::Running {
|
||||||
|
EnvState::Built
|
||||||
|
} else {
|
||||||
|
meta.state
|
||||||
|
};
|
||||||
|
|
||||||
|
let normalized = self.load_manifest(&meta.manifest_hash)?;
|
||||||
|
let store_str = self.store_root_str.clone();
|
||||||
|
let backend = select_backend(&normalized.runtime_backend, &store_str)?;
|
||||||
|
|
||||||
|
let running_file = self.layout.env_path(env_id).join(".running");
|
||||||
|
let has_marker = running_file.exists();
|
||||||
|
let allow_marker_pid_fallback =
|
||||||
|
normalized.runtime_backend == "namespace" || normalized.runtime_backend == "mock";
|
||||||
|
|
||||||
|
let marker_pid = if has_marker && allow_marker_pid_fallback {
|
||||||
|
match std::fs::read_to_string(&running_file) {
|
||||||
|
Ok(s) => {
|
||||||
|
let pid_str = s.trim();
|
||||||
|
let pid = pid_str.parse::<u32>().ok();
|
||||||
|
if let Some(p) = pid {
|
||||||
|
if Path::new(&format!("/proc/{p}")).exists() {
|
||||||
|
Some(p)
|
||||||
|
} else {
|
||||||
|
let _ = std::fs::remove_file(&running_file);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !pid_str.is_empty() {
|
||||||
|
let _ = std::fs::remove_file(&running_file);
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => None,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let status = backend.status(env_id);
|
||||||
|
let (runtime_running, runtime_pid) = match status {
|
||||||
|
Ok(s) => (s.running, s.pid),
|
||||||
|
Err(e) => {
|
||||||
|
if meta.state == EnvState::Running || has_marker {
|
||||||
|
return Err(e.into());
|
||||||
|
}
|
||||||
|
(false, None)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if meta.state != EnvState::Running && !runtime_running && marker_pid.is_none() {
|
||||||
return Err(CoreError::Runtime(
|
return Err(CoreError::Runtime(
|
||||||
karapace_runtime::RuntimeError::NotRunning(format!(
|
karapace_runtime::RuntimeError::NotRunning(format!(
|
||||||
"{} (state: {})",
|
"{} (state: {})",
|
||||||
|
|
@ -507,12 +558,9 @@ impl Engine {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let normalized = self.load_manifest(&meta.manifest_hash)?;
|
let pid_to_kill = runtime_pid.or(marker_pid);
|
||||||
let store_str = self.store_root_str.clone();
|
|
||||||
let backend = select_backend(&normalized.runtime_backend, &store_str)?;
|
|
||||||
let status = backend.status(env_id)?;
|
|
||||||
|
|
||||||
if let Some(pid) = status.pid {
|
if let Some(pid) = pid_to_kill {
|
||||||
let pid_i32 = i32::try_from(pid).map_err(|_| {
|
let pid_i32 = i32::try_from(pid).map_err(|_| {
|
||||||
CoreError::Runtime(karapace_runtime::RuntimeError::ExecFailed(format!(
|
CoreError::Runtime(karapace_runtime::RuntimeError::ExecFailed(format!(
|
||||||
"invalid pid {pid}: exceeds i32 range"
|
"invalid pid {pid}: exceeds i32 range"
|
||||||
|
|
@ -556,7 +604,7 @@ impl Engine {
|
||||||
let running_file = self.layout.env_path(env_id).join(".running");
|
let running_file = self.layout.env_path(env_id).join(".running");
|
||||||
let _ = std::fs::remove_file(running_file);
|
let _ = std::fs::remove_file(running_file);
|
||||||
|
|
||||||
self.meta_store.update_state(env_id, EnvState::Built)?;
|
self.meta_store.update_state(env_id, target_state)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1578,6 +1578,93 @@ fn stop_non_running_env_returns_error() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_succeeds_when_metadata_stale_but_running_marker_exists() {
|
||||||
|
let store = tempfile::tempdir().unwrap();
|
||||||
|
let project = tempfile::tempdir().unwrap();
|
||||||
|
let engine = Engine::new(store.path());
|
||||||
|
|
||||||
|
let manifest = write_manifest(project.path(), &mock_manifest(&["git"]));
|
||||||
|
let r = engine.build(&manifest).unwrap();
|
||||||
|
let env_id = r.identity.env_id.to_string();
|
||||||
|
|
||||||
|
// Spawn a real sleep process and wait on it later to avoid zombie
|
||||||
|
let mut child = std::process::Command::new("sleep")
|
||||||
|
.arg("60")
|
||||||
|
.spawn()
|
||||||
|
.expect("spawn sleep");
|
||||||
|
let pid = child.id();
|
||||||
|
let pid_i32 = i32::try_from(pid).expect("pid fits in i32");
|
||||||
|
|
||||||
|
// Leave metadata state as Built but write .running marker with real PID.
|
||||||
|
let env_dir = store.path().join("env").join(&env_id);
|
||||||
|
fs::create_dir_all(&env_dir).unwrap();
|
||||||
|
fs::write(env_dir.join(".running"), pid.to_string()).unwrap();
|
||||||
|
|
||||||
|
let result = engine.stop(&env_id);
|
||||||
|
|
||||||
|
// Clean up the real process regardless of result
|
||||||
|
unsafe {
|
||||||
|
libc::kill(pid_i32, libc::SIGKILL);
|
||||||
|
}
|
||||||
|
let _ = child.wait();
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
result.is_ok(),
|
||||||
|
"stop must succeed when .running exists even if metadata is stale: {:?}",
|
||||||
|
result.err()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify state remains Built
|
||||||
|
let meta = engine.inspect(&env_id).unwrap();
|
||||||
|
assert_eq!(meta.state, EnvState::Built);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_preserves_frozen_state() {
|
||||||
|
let store = tempfile::tempdir().unwrap();
|
||||||
|
let project = tempfile::tempdir().unwrap();
|
||||||
|
let engine = Engine::new(store.path());
|
||||||
|
|
||||||
|
let manifest = write_manifest(project.path(), &mock_manifest(&["git"]));
|
||||||
|
let r = engine.build(&manifest).unwrap();
|
||||||
|
let env_id = r.identity.env_id.to_string();
|
||||||
|
|
||||||
|
// Spawn a real sleep process and wait on it later to avoid zombie
|
||||||
|
let mut child = std::process::Command::new("sleep")
|
||||||
|
.arg("60")
|
||||||
|
.spawn()
|
||||||
|
.expect("spawn sleep");
|
||||||
|
let pid = child.id();
|
||||||
|
let pid_i32 = i32::try_from(pid).expect("pid fits in i32");
|
||||||
|
|
||||||
|
// Set metadata to Frozen and write .running marker with real PID.
|
||||||
|
let layout = StoreLayout::new(store.path());
|
||||||
|
let meta_store = karapace_store::MetadataStore::new(layout.clone());
|
||||||
|
meta_store.update_state(&env_id, EnvState::Frozen).unwrap();
|
||||||
|
|
||||||
|
let env_dir = store.path().join("env").join(&env_id);
|
||||||
|
fs::create_dir_all(&env_dir).unwrap();
|
||||||
|
fs::write(env_dir.join(".running"), pid.to_string()).unwrap();
|
||||||
|
|
||||||
|
let result = engine.stop(&env_id);
|
||||||
|
|
||||||
|
// Clean up the real process regardless of result
|
||||||
|
unsafe {
|
||||||
|
libc::kill(pid_i32, libc::SIGKILL);
|
||||||
|
}
|
||||||
|
let _ = child.wait();
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
result.is_ok(),
|
||||||
|
"stop must succeed even if env is frozen but still running: {:?}",
|
||||||
|
result.err()
|
||||||
|
);
|
||||||
|
|
||||||
|
let meta = engine.inspect(&env_id).unwrap();
|
||||||
|
assert_eq!(meta.state, EnvState::Frozen);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn stale_running_marker_cleaned_on_engine_new() {
|
fn stale_running_marker_cleaned_on_engine_new() {
|
||||||
let store = tempfile::tempdir().unwrap();
|
let store = tempfile::tempdir().unwrap();
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue