Skip to content

Commit

Permalink
Use node annotation for previous state
Browse files Browse the repository at this point in the history
Without this, we use the daemonset lock for annotating the state
of the node. This is unnecessary, as we have rights on the node.

This is a problem, as it makes the whole lock model more complex,
and prevents other implementations for lock.

This fixes it by adapting the drain/uncordon functions to rely
on the node annotations, by introducing a new annotation, named
"kured.dev/node-unschedulable-before-drain"

Signed-off-by: Jean-Philippe Evrard <[email protected]>
  • Loading branch information
evrardjp committed Nov 11, 2024
1 parent a42e54d commit 1aee6b2
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 162 deletions.
155 changes: 88 additions & 67 deletions cmd/kured/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var (
alertFilter regexpValue
alertFilterMatchOnly bool
alertFiringOnly bool
annotateNodes bool
annotateNodeProgress bool
concurrency int
drainDelay time.Duration
drainGracePeriod int
Expand Down Expand Up @@ -92,7 +92,8 @@ var (

const (
// KuredNodeLockAnnotation is the canonical string value for the kured node-lock annotation
KuredNodeLockAnnotation string = "kured.dev/kured-node-lock"
KuredNodeLockAnnotation string = "kured.dev/kured-node-lock"
KuredNodeWasUnschedulableBeforeDrainAnnotation string = "kured.dev/node-unschedulable-before-drain"
// KuredRebootInProgressAnnotation is the canonical string value for the kured reboot-in-progress annotation
KuredRebootInProgressAnnotation string = "kured.dev/kured-reboot-in-progress"
// KuredMostRecentRebootNeededAnnotation is the canonical string value for the kured most-recent-reboot-needed annotation
Expand All @@ -112,7 +113,7 @@ func main() {
// flags are sorted alphabetically by type
flag.BoolVar(&alertFilterMatchOnly, "alert-filter-match-only", false, "Only block if the alert-filter-regexp matches active alerts")
flag.BoolVar(&alertFiringOnly, "alert-firing-only", false, "only consider firing alerts when checking for active alerts")
flag.BoolVar(&annotateNodes, "annotate-nodes", false, "if set, the annotations 'kured.dev/kured-reboot-in-progress' and 'kured.dev/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots")
flag.BoolVar(&annotateNodeProgress, "annotate-nodes", false, "if set, the annotations 'kured.dev/kured-reboot-in-progress' and 'kured.dev/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots")
flag.BoolVar(&forceReboot, "force-reboot", false, "force a reboot even if the drain fails or times out")
flag.DurationVar(&drainDelay, "drain-delay", 0, "delay drain for this duration (default: 0, disabled)")
flag.DurationVar(&drainTimeout, "drain-timeout", 0, "timeout after which the drain is aborted (default: 0, infinite time)")
Expand Down Expand Up @@ -199,8 +200,8 @@ func main() {

slog.Info(fmt.Sprintf("preferNoSchedule taint: (%s)", preferNoScheduleTaintName), "node", nodeID)

if annotateNodes {
slog.Info("Will annotate nodes during kured reboot operations", "node", nodeID)
if annotateNodeProgress {
slog.Info("Will annotate nodes progress during kured reboot operations", "node", nodeID)
}

rebooter, err := reboot.NewRebooter(rebootMethod, rebootCommand, rebootSignal, rebootDelay, true, 1)
Expand Down Expand Up @@ -390,6 +391,20 @@ func drain(client *kubernetes.Clientset, node *v1.Node, notifier notifications.N
Timeout: drainTimeout,
}

// Add previous state of the node Spec.Unschedulable into an annotation
// If an annotation was present, it means that either the cordon or drain failed,
// hence it does not need to reapply: It might override what the user has set
// (for example if the cordon succeeded but the drain failed)
if _, ok := node.Annotations[KuredNodeWasUnschedulableBeforeDrainAnnotation]; !ok {
// Store State of the node before cordon changes it
annotations := map[string]string{KuredNodeWasUnschedulableBeforeDrainAnnotation: strconv.FormatBool(node.Spec.Unschedulable)}
// & annotate this node with a timestamp so that other node maintenance tools know how long it's been since this node has been marked for reboot
err := addNodeAnnotations(client, nodeID, annotations)
if err != nil {
return fmt.Errorf("error saving state of the node %s, %v", nodename, err)
}
}

if err := kubectldrain.RunCordonOrUncordon(drainer, node, true); err != nil {
return fmt.Errorf("error cordonning node %s, %v", nodename, err)
}
Expand All @@ -400,8 +415,30 @@ func drain(client *kubernetes.Clientset, node *v1.Node, notifier notifications.N
return nil
}

func uncordon(client *kubernetes.Clientset, node *v1.Node) error {
nodename := node.GetName()
func uncordon(client *kubernetes.Clientset, node *v1.Node, notifier notifications.Notifier) error {
// Revert cordon spec change with the help of node annotation
annotationContent, ok := node.Annotations[KuredNodeWasUnschedulableBeforeDrainAnnotation]
if !ok {
// If no node annotations, uncordon will not act.
// Do not uncordon if you do not know previous state, it could bring nodes under maintenance online!
return nil
}

wasUnschedulable, err := strconv.ParseBool(annotationContent)
if err != nil {
return fmt.Errorf("annotation was edited and cannot be converted back to bool %v, cannot uncordon (unrecoverable)", err)
}

if wasUnschedulable {
// Just delete the annotation, keep Cordonned
err := deleteNodeAnnotation(client, nodeID, KuredNodeWasUnschedulableBeforeDrainAnnotation)
if err != nil {
return fmt.Errorf("error removing the WasUnschedulable annotation, keeping the node stuck in cordonned state forever %v", err)
}
return nil
}

nodeName := node.GetName()
kubectlStdOutLogger := &slogWriter{message: "uncordon: results", stream: "stdout"}
kubectlStdErrLogger := &slogWriter{message: "uncordon: results", stream: "stderr"}

Expand All @@ -412,11 +449,17 @@ func uncordon(client *kubernetes.Clientset, node *v1.Node) error {
Ctx: context.Background(),
}
if err := kubectldrain.RunCordonOrUncordon(drainer, node, false); err != nil {
return fmt.Errorf("error uncordonning node %s: %v", nodename, err)
return fmt.Errorf("error uncordonning node %s: %v", nodeName, err)
} else if postRebootNodeLabels != nil {
err := updateNodeLabels(client, node, postRebootNodeLabels)
return fmt.Errorf("error updating node (%s) labels, needs manual intervention %v", nodename, err)
return fmt.Errorf("error updating node (%s) labels, needs manual intervention %v", nodeName, err)
}

err = deleteNodeAnnotation(client, nodeID, KuredNodeWasUnschedulableBeforeDrainAnnotation)
if err != nil {
return fmt.Errorf("error removing the WasUnschedulable annotation, keeping the node stuck in current state forever %v", err)
}
notifier.Send(fmt.Sprintf(messageTemplateUncordon, nodeID), "Node uncordonned successfully")
return nil
}

Expand Down Expand Up @@ -516,44 +559,13 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
continue
}

// Get lock data to know if need to uncordon the node
// to get the node back to its previous state
// TODO: Need to move to another method to check the current data of the lock relevant for this node
holding, lockData, err := lock.Holding()
err = uncordon(client, node, notifier)
if err != nil {
// Internal wiring, admin does not need to be aware unless debugging session
slog.Debug(fmt.Sprintf("Error checking lock - Not applying any action: %v.\nPlease check API", err), "node", nodeID, "error", err)
// Might be a transient API issue or a real problem. Inform the admin
slog.Info("unable to uncordon needs investigation", "node", nodeID, "error", err)
continue
}

// we check if holding ONLY to know if lockData is valid.
// When moving to fetch lockData as a separate method, remove
// this whole condition.
// However, it means that Release()
// need to behave idempotently regardless or not the lock is
// held, but that's an ideal state.
// what we should simply do is reconcile the lock data
// with the node spec. But behind the scenes its a bug
// if it's not holding due to an error
if holding && !lockData.Metadata.Unschedulable {
// Split into two lines to remember I need to remove the first
// condition ;)
if node.Spec.Unschedulable != lockData.Metadata.Unschedulable && lockData.Metadata.Unschedulable == false {
// Important lifecycle event - admin should be aware
slog.Info("uncordoning node", "node", nodeID)
err = uncordon(client, node)
if err != nil {
// Transient API issue - no problem, will retry. No need to worry the admin for this
slog.Debug(fmt.Sprintf("Unable to uncordon %s: %v, will continue to hold lock and retry uncordon", node.GetName(), err), "node", nodeID, "error", err)
continue
}
// TODO, modify the actions to directly log and notify, instead of individual methods giving
// an incomplete view of the lifecycle
notifier.Send(fmt.Sprintf(messageTemplateUncordon, nodeID), "Node uncordonned")
}

}

// Releasing lock earlier is nice for other nodes
err = lock.Release()
if err != nil {
Expand All @@ -563,7 +575,7 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
}
// Regardless or not we are holding the lock
// The node should not say it's still in progress if the reboot is done
if annotateNodes {
if annotateNodeProgress {
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok {
// Who reads this? I hope nobody bothers outside real debug cases
slog.Debug(fmt.Sprintf("Deleting node %s annotation %s", nodeID, KuredRebootInProgressAnnotation), "node", nodeID)
Expand All @@ -578,6 +590,7 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
rebootRequiredGauge.WithLabelValues(nodeID).Set(1)

// Act on reboot required.

if !window.Contains(time.Now()) {
// Probably spamming outside the maintenance window. This should not be logged as info
slog.Debug("reboot required but outside maintenance window", "node", nodeID)
Expand All @@ -602,11 +615,11 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
slog.Debug("error retrieving node object via k8s API, retrying at next tick", "node", nodeID, "error", err)
continue
}
// nodeMeta contains the node Metadata that should be included in the lock
nodeMeta := daemonsetlock.NodeMeta{Unschedulable: node.Spec.Unschedulable}
//// nodeMeta contains the node Metadata that should be included in the lock
//nodeMeta := daemonsetlock.NodeMeta{Unschedulable: node.Spec.Unschedulable}

var timeNowString string
if annotateNodes {
if annotateNodeProgress {
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; !ok {
timeNowString = time.Now().Format(time.RFC3339)
// Annotate this node to indicate that "I am going to be rebooted!"
Expand All @@ -624,27 +637,32 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
preferNoScheduleTaint.Enable()

// This could be merged into a single idempotent "Acquire" lock
holding, _, err := lock.Holding()
if err != nil {
// Not important to worry the admin
slog.Debug("error testing lock", "node", nodeID, "error", err)
holding, err := lock.Acquire()
if err != nil || !holding {
slog.Debug("error acquiring lock, will retry at next tick", "node", nodeID, "error", err)
continue
}

if !holding {
acquired, holder, err := lock.Acquire(nodeMeta)
if err != nil {
// Not important to worry the admin
slog.Debug("error acquiring lock, will retry at next tick", "node", nodeID, "error", err)
continue
}
if !acquired {
// Not very important - lock prevents action
slog.Debug(fmt.Sprintf("Lock already held by %v, will retry at next tick", holder), "node", nodeID)
continue
}
}
//// This could be merged into a single idempotent "Acquire" lock
//holding, _, err := lock.Holding()
//if err != nil {
// // Not important to worry the admin
// slog.Debug("error testing lock", "node", nodeID, "error", err)
// continue
//}
//
//if !holding {
// acquired, holder, err := lock.Acquire(nodeMeta)
// if err != nil {
// // Not important to worry the admin
// slog.Debug("error acquiring lock, will retry at next tick", "node", nodeID, "error", err)
// continue
// }
// if !acquired {
// // Not very important - lock prevents action
// slog.Debug(fmt.Sprintf("Lock already held by %v, will retry at next tick", holder), "node", nodeID)
// continue
// }
//}

err = drain(client, node, notifier)
if err != nil {
Expand All @@ -659,7 +677,10 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
// If shown, it is helping understand the uncordonning. If the admin seems the node as cordonned
// with this, it needs to take action (for example if the node was previously cordonned!)
slog.Info("Performing a best-effort uncordon after failed cordon and drain", "node", nodeID)
uncordon(client, node)
err := uncordon(client, node, notifier)
if err != nil {
slog.Info("Uncordon failed", "error", err)
}
continue
}
}
Expand Down
Loading

0 comments on commit 1aee6b2

Please sign in to comment.