-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(bors): merge pull request #862
862: Don't destroy replicas part of shutdown nexuses r=tiagolobocastro a=tiagolobocastro feat: re-shutdown nexus when node is online When nexus shutdown fails, if the node comes back online let's attempt the shutdown again. Signed-off-by: Tiago Castro <[email protected]> --- fix: don't disown replica from unshutdown nexus In case nexus shutdown failed we used to disown the replica from both the volume and the nexus. This can be a problem if the nexus is still running as the io-engine does not handle it gracefully, leading into pool lock issues. Instead, simply disown the replica from the volume and not the nexus. Signed-off-by: Tiago Castro <[email protected]> Co-authored-by: Tiago Castro <[email protected]>
- Loading branch information
Showing
7 changed files
with
332 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
control-plane/agents/src/bin/core/controller/reconciler/nexus/re_shutdown.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
use crate::controller::{ | ||
reconciler::PollTriggerEvent, | ||
resources::operations_helper::OperationSequenceGuard, | ||
task_poller::{PollContext, PollEvent, PollResult, PollerState, TaskPoller}, | ||
}; | ||
|
||
/// ReShutdown nexuses if node comes back online with the Nexus intact. | ||
#[derive(Debug)] | ||
pub(super) struct ReShutdown {} | ||
impl ReShutdown { | ||
/// Return a new `Self`. | ||
pub(super) fn new() -> Self { | ||
Self {} | ||
} | ||
} | ||
|
||
#[async_trait::async_trait] | ||
impl TaskPoller for ReShutdown { | ||
async fn poll(&mut self, context: &PollContext) -> PollResult { | ||
// Fetch all nexuses that are not properly shutdown | ||
for nexus in context.registry().specs().failed_shutdown_nexuses().await { | ||
let Some(volume_id) = &nexus.immutable_ref().owner else { | ||
continue; | ||
}; | ||
let Ok(_volume) = context.specs().volume(volume_id).await else { | ||
continue; | ||
}; | ||
|
||
let Ok(mut nexus) = nexus.operation_guard() else { | ||
continue; | ||
}; | ||
|
||
nexus.re_shutdown_nexus(context.registry()).await; | ||
} | ||
PollResult::Ok(PollerState::Idle) | ||
} | ||
|
||
async fn poll_timer(&mut self, _context: &PollContext) -> bool { | ||
false | ||
} | ||
|
||
async fn poll_event(&mut self, context: &PollContext) -> bool { | ||
matches!( | ||
context.event(), | ||
PollEvent::Triggered(PollTriggerEvent::Start) | ||
| PollEvent::Triggered(PollTriggerEvent::NodeStateChangeOnline) | ||
| PollEvent::Triggered(PollTriggerEvent::NodeDrain) | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.