Skip to content

Commit

Permalink
[release-1.8] 🐛 KCP: remove etcd member in pre-terminate hook (#11136)
Browse files Browse the repository at this point in the history
* KCP: remove etcd member in pre-terminate hook

* fixup
  • Loading branch information
sbueringer authored Sep 5, 2024
1 parent 0885e48 commit b772261
Show file tree
Hide file tree
Showing 16 changed files with 684 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ const (
// failures in updating remediation retry (the counter restarts from zero).
RemediationForAnnotation = "controlplane.cluster.x-k8s.io/remediation-for"

// PreTerminateHookCleanupAnnotation is the annotation KCP sets on Machines to ensure it can later remove the
// etcd member right before Machine termination (i.e. before InfraMachine deletion).
// Note: Starting with Kubernetes v1.31 this hook will wait for all other pre-terminate hooks to finish to
// ensure it runs last (thus ensuring that kubelet is still working while other pre-terminate hooks run).
PreTerminateHookCleanupAnnotation = clusterv1.PreTerminateDeleteHookAnnotationPrefix + "/kcp-cleanup"

// DefaultMinHealthyPeriod defines the default minimum period before we consider a remediation on a
// machine unrelated from the previous remediation.
DefaultMinHealthyPeriod = 1 * time.Hour
Expand Down
5 changes: 5 additions & 0 deletions controlplane/kubeadm/internal/control_plane.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ func (c *ControlPlane) HasDeletingMachine() bool {
return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0
}

// DeletingMachines returns machines in the control plane that are in the process of being deleted.
func (c *ControlPlane) DeletingMachines() collections.Machines {
return c.Machines.Filter(collections.HasDeletionTimestamp)
}

// GetKubeadmConfig returns the KubeadmConfig of a given machine.
func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.KubeadmConfig, bool) {
kubeadmConfig, ok := c.KubeadmConfigs[machineName]
Expand Down
127 changes: 122 additions & 5 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,10 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
return ctrl.Result{}, err
}

if result, err := r.reconcilePreTerminateHook(ctx, controlPlane); err != nil || !result.IsZero() {
return result, err
}

// Reconcile unhealthy machines by triggering deletion and requeue if it is considered safe to remediate,
// otherwise continue with the other KCP operations.
if result, err := r.reconcileUnhealthyMachines(ctx, controlPlane); err != nil || !result.IsZero() {
Expand Down Expand Up @@ -563,12 +567,24 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
// Delete control plane machines in parallel
machinesToDelete := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
var errs []error
for i := range machinesToDelete {
m := machinesToDelete[i]
logger := log.WithValues("Machine", klog.KObj(m))
if err := r.Client.Delete(ctx, machinesToDelete[i]); err != nil && !apierrors.IsNotFound(err) {
logger.Error(err, "Failed to cleanup owned machine")
for _, machineToDelete := range machinesToDelete {
log := log.WithValues("Machine", klog.KObj(machineToDelete))
ctx := ctrl.LoggerInto(ctx, log)

// During KCP deletion we don't care about forwarding etcd leadership or removing etcd members.
// So we are removing the pre-terminate hook.
// This is important because when deleting KCP we will delete all members of etcd and it's not possible
// to forward etcd leadership without any member left after we went through the Machine deletion.
// Also in this case the reconcileDelete code of the Machine controller won't execute Node drain
// and wait for volume detach.
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, machineToDelete); err != nil {
errs = append(errs, err)
continue
}

log.Info("Deleting control plane Machine")
if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
errs = append(errs, errors.Wrapf(err, "failed to delete control plane Machine %s", klog.KObj(machineToDelete)))
}
}
if len(errs) > 0 {
Expand All @@ -581,6 +597,18 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
log := ctrl.LoggerFrom(ctx)
log.Info("Removing pre-terminate hook from control plane Machine")

machineOriginal := machine.DeepCopy()
delete(machine.Annotations, controlplanev1.PreTerminateHookCleanupAnnotation)
if err := r.Client.Patch(ctx, machine, client.MergeFrom(machineOriginal)); err != nil {
return errors.Wrapf(err, "failed to remove pre-terminate hook from control plane Machine %s", klog.KObj(machine))
}
return nil
}

// ClusterToKubeadmControlPlane is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
// for KubeadmControlPlane based on updates to a Cluster.
func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(_ context.Context, o client.Object) []ctrl.Request {
Expand Down Expand Up @@ -768,6 +796,95 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
return nil
}

func (r *KubeadmControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
if !controlPlane.HasDeletingMachine() {
return ctrl.Result{}, nil
}

log := ctrl.LoggerFrom(ctx)

// Return early, if there is already a deleting Machine without the pre-terminate hook.
// We are going to wait until this Machine goes away before running the pre-terminate hook on other Machines.
for _, deletingMachine := range controlPlane.DeletingMachines() {
if _, exists := deletingMachine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}
}

// Pick the Machine with the oldest deletionTimestamp to keep this function deterministic / reentrant
// so we only remove the pre-terminate hook from one Machine at a time.
deletingMachine := controlPlane.DeletingMachines().OldestDeletionTimestamp()
log = log.WithValues("Machine", klog.KObj(deletingMachine))
ctx = ctrl.LoggerInto(ctx, log)

parsedVersion, err := semver.ParseTolerant(controlPlane.KCP.Spec.Version)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to parse Kubernetes version %q", controlPlane.KCP.Spec.Version)
}

// Return early if there are other pre-terminate hooks for the Machine.
// The KCP pre-terminate hook should be the one executed last, so that kubelet
// is still working while other pre-terminate hooks are run.
// Note: This is done only for Kubernetes >= v1.31 to reduce the blast radius of this check.
if version.Compare(parsedVersion, semver.MustParse("1.31.0"), version.WithoutPreReleases()) >= 0 {
if machineHasOtherPreTerminateHooks(deletingMachine) {
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}
}

// Return early because the Machine controller is not yet waiting for the pre-terminate hook.
c := conditions.Get(deletingMachine, clusterv1.PreTerminateDeleteHookSucceededCondition)
if c == nil || c.Status != corev1.ConditionFalse || c.Reason != clusterv1.WaitingExternalHookReason {
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

// The following will execute and remove the pre-terminate hook from the Machine.

// If we have more than 1 Machine and etcd is managed we forward etcd leadership and remove the member
// to keep the etcd cluster healthy.
if controlPlane.Machines.Len() > 1 && controlPlane.IsEtcdManaged() {
workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine))
}

// Note: In regular deletion cases (remediation, scale down) the leader should have been already moved.
// We're doing this again here in case the Machine became leader again or the Machine deletion was
// triggered in another way (e.g. a user running kubectl delete machine)
etcdLeaderCandidate := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)).Newest()
if etcdLeaderCandidate != nil {
if err := workloadCluster.ForwardEtcdLeadership(ctx, deletingMachine, etcdLeaderCandidate); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to move leadership to candidate Machine %s", etcdLeaderCandidate.Name)
}
} else {
log.Info("Skip forwarding etcd leadership, because there is no other control plane Machine without a deletionTimestamp")
}

// Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down.
// If ControlPlaneKubeletLocalMode is used, the kubelet is communicating with the local apiserver and thus now
// won't be able to see any updates to e.g. Pods anymore.
if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, deletingMachine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s", klog.KObj(deletingMachine))
}
}

if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil {
return ctrl.Result{}, err
}

log.Info("Waiting for Machines to be deleted", "machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", "))
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

func machineHasOtherPreTerminateHooks(machine *clusterv1.Machine) bool {
for k := range machine.Annotations {
if strings.HasPrefix(k, clusterv1.PreTerminateDeleteHookAnnotationPrefix) && k != controlplanev1.PreTerminateHookCleanupAnnotation {
return true
}
}
return false
}

func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context.Context, controlPlane *internal.ControlPlane) error {
log := ctrl.LoggerFrom(ctx)

Expand Down
Loading

0 comments on commit b772261

Please sign in to comment.