From 1aee6b28d3667ce35faf187a06a7676df21aa6f3 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Evrard Date: Mon, 11 Nov 2024 13:08:57 +0100 Subject: [PATCH] Use node annotation for previous state Without this, we use the daemonset lock for annotating the state of the node. This is unnecessary, as we have rights on the node. This is a problem, as it makes the whole lock model more complex, and prevents other implementations for lock. This fixes it by adapting the drain/uncordon functions to rely on the node annotations, by introducing a new annotation, named "kured.dev/node-unschedulable-before-drain" Signed-off-by: Jean-Philippe Evrard --- cmd/kured/main.go | 155 +++++++++------- internal/daemonsetlock/daemonsetlock.go | 186 +++++++++---------- internal/daemonsetlock/daemonsetlock_test.go | 4 +- 3 files changed, 183 insertions(+), 162 deletions(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index ef60f78f7..4846c6f76 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -39,7 +39,7 @@ var ( alertFilter regexpValue alertFilterMatchOnly bool alertFiringOnly bool - annotateNodes bool + annotateNodeProgress bool concurrency int drainDelay time.Duration drainGracePeriod int @@ -92,7 +92,8 @@ var ( const ( // KuredNodeLockAnnotation is the canonical string value for the kured node-lock annotation - KuredNodeLockAnnotation string = "kured.dev/kured-node-lock" + KuredNodeLockAnnotation string = "kured.dev/kured-node-lock" + KuredNodeWasUnschedulableBeforeDrainAnnotation string = "kured.dev/node-unschedulable-before-drain" // KuredRebootInProgressAnnotation is the canonical string value for the kured reboot-in-progress annotation KuredRebootInProgressAnnotation string = "kured.dev/kured-reboot-in-progress" // KuredMostRecentRebootNeededAnnotation is the canonical string value for the kured most-recent-reboot-needed annotation @@ -112,7 +113,7 @@ func main() { // flags are sorted alphabetically by type flag.BoolVar(&alertFilterMatchOnly, "alert-filter-match-only", false, "Only block if the alert-filter-regexp matches active alerts") flag.BoolVar(&alertFiringOnly, "alert-firing-only", false, "only consider firing alerts when checking for active alerts") - flag.BoolVar(&annotateNodes, "annotate-nodes", false, "if set, the annotations 'kured.dev/kured-reboot-in-progress' and 'kured.dev/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots") + flag.BoolVar(&annotateNodeProgress, "annotate-nodes", false, "if set, the annotations 'kured.dev/kured-reboot-in-progress' and 'kured.dev/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots") flag.BoolVar(&forceReboot, "force-reboot", false, "force a reboot even if the drain fails or times out") flag.DurationVar(&drainDelay, "drain-delay", 0, "delay drain for this duration (default: 0, disabled)") flag.DurationVar(&drainTimeout, "drain-timeout", 0, "timeout after which the drain is aborted (default: 0, infinite time)") @@ -199,8 +200,8 @@ func main() { slog.Info(fmt.Sprintf("preferNoSchedule taint: (%s)", preferNoScheduleTaintName), "node", nodeID) - if annotateNodes { - slog.Info("Will annotate nodes during kured reboot operations", "node", nodeID) + if annotateNodeProgress { + slog.Info("Will annotate nodes progress during kured reboot operations", "node", nodeID) } rebooter, err := reboot.NewRebooter(rebootMethod, rebootCommand, rebootSignal, rebootDelay, true, 1) @@ -390,6 +391,20 @@ func drain(client *kubernetes.Clientset, node *v1.Node, notifier notifications.N Timeout: drainTimeout, } + // Add previous state of the node Spec.Unschedulable into an annotation + // If an annotation was present, it means that either the cordon or drain failed, + // hence it does not need to reapply: It might override what the user has set + // (for example if the cordon succeeded but the drain failed) + if _, ok := node.Annotations[KuredNodeWasUnschedulableBeforeDrainAnnotation]; !ok { + // Store State of the node before cordon changes it + annotations := map[string]string{KuredNodeWasUnschedulableBeforeDrainAnnotation: strconv.FormatBool(node.Spec.Unschedulable)} + // & annotate this node with a timestamp so that other node maintenance tools know how long it's been since this node has been marked for reboot + err := addNodeAnnotations(client, nodeID, annotations) + if err != nil { + return fmt.Errorf("error saving state of the node %s, %v", nodename, err) + } + } + if err := kubectldrain.RunCordonOrUncordon(drainer, node, true); err != nil { return fmt.Errorf("error cordonning node %s, %v", nodename, err) } @@ -400,8 +415,30 @@ func drain(client *kubernetes.Clientset, node *v1.Node, notifier notifications.N return nil } -func uncordon(client *kubernetes.Clientset, node *v1.Node) error { - nodename := node.GetName() +func uncordon(client *kubernetes.Clientset, node *v1.Node, notifier notifications.Notifier) error { + // Revert cordon spec change with the help of node annotation + annotationContent, ok := node.Annotations[KuredNodeWasUnschedulableBeforeDrainAnnotation] + if !ok { + // If no node annotations, uncordon will not act. + // Do not uncordon if you do not know previous state, it could bring nodes under maintenance online! + return nil + } + + wasUnschedulable, err := strconv.ParseBool(annotationContent) + if err != nil { + return fmt.Errorf("annotation was edited and cannot be converted back to bool %v, cannot uncordon (unrecoverable)", err) + } + + if wasUnschedulable { + // Just delete the annotation, keep Cordonned + err := deleteNodeAnnotation(client, nodeID, KuredNodeWasUnschedulableBeforeDrainAnnotation) + if err != nil { + return fmt.Errorf("error removing the WasUnschedulable annotation, keeping the node stuck in cordonned state forever %v", err) + } + return nil + } + + nodeName := node.GetName() kubectlStdOutLogger := &slogWriter{message: "uncordon: results", stream: "stdout"} kubectlStdErrLogger := &slogWriter{message: "uncordon: results", stream: "stderr"} @@ -412,11 +449,17 @@ func uncordon(client *kubernetes.Clientset, node *v1.Node) error { Ctx: context.Background(), } if err := kubectldrain.RunCordonOrUncordon(drainer, node, false); err != nil { - return fmt.Errorf("error uncordonning node %s: %v", nodename, err) + return fmt.Errorf("error uncordonning node %s: %v", nodeName, err) } else if postRebootNodeLabels != nil { err := updateNodeLabels(client, node, postRebootNodeLabels) - return fmt.Errorf("error updating node (%s) labels, needs manual intervention %v", nodename, err) + return fmt.Errorf("error updating node (%s) labels, needs manual intervention %v", nodeName, err) } + + err = deleteNodeAnnotation(client, nodeID, KuredNodeWasUnschedulableBeforeDrainAnnotation) + if err != nil { + return fmt.Errorf("error removing the WasUnschedulable annotation, keeping the node stuck in current state forever %v", err) + } + notifier.Send(fmt.Sprintf(messageTemplateUncordon, nodeID), "Node uncordonned successfully") return nil } @@ -516,44 +559,13 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers. continue } - // Get lock data to know if need to uncordon the node - // to get the node back to its previous state - // TODO: Need to move to another method to check the current data of the lock relevant for this node - holding, lockData, err := lock.Holding() + err = uncordon(client, node, notifier) if err != nil { - // Internal wiring, admin does not need to be aware unless debugging session - slog.Debug(fmt.Sprintf("Error checking lock - Not applying any action: %v.\nPlease check API", err), "node", nodeID, "error", err) + // Might be a transient API issue or a real problem. Inform the admin + slog.Info("unable to uncordon needs investigation", "node", nodeID, "error", err) continue } - // we check if holding ONLY to know if lockData is valid. - // When moving to fetch lockData as a separate method, remove - // this whole condition. - // However, it means that Release() - // need to behave idempotently regardless or not the lock is - // held, but that's an ideal state. - // what we should simply do is reconcile the lock data - // with the node spec. But behind the scenes its a bug - // if it's not holding due to an error - if holding && !lockData.Metadata.Unschedulable { - // Split into two lines to remember I need to remove the first - // condition ;) - if node.Spec.Unschedulable != lockData.Metadata.Unschedulable && lockData.Metadata.Unschedulable == false { - // Important lifecycle event - admin should be aware - slog.Info("uncordoning node", "node", nodeID) - err = uncordon(client, node) - if err != nil { - // Transient API issue - no problem, will retry. No need to worry the admin for this - slog.Debug(fmt.Sprintf("Unable to uncordon %s: %v, will continue to hold lock and retry uncordon", node.GetName(), err), "node", nodeID, "error", err) - continue - } - // TODO, modify the actions to directly log and notify, instead of individual methods giving - // an incomplete view of the lifecycle - notifier.Send(fmt.Sprintf(messageTemplateUncordon, nodeID), "Node uncordonned") - } - - } - // Releasing lock earlier is nice for other nodes err = lock.Release() if err != nil { @@ -563,7 +575,7 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers. } // Regardless or not we are holding the lock // The node should not say it's still in progress if the reboot is done - if annotateNodes { + if annotateNodeProgress { if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok { // Who reads this? I hope nobody bothers outside real debug cases slog.Debug(fmt.Sprintf("Deleting node %s annotation %s", nodeID, KuredRebootInProgressAnnotation), "node", nodeID) @@ -578,6 +590,7 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers. rebootRequiredGauge.WithLabelValues(nodeID).Set(1) // Act on reboot required. + if !window.Contains(time.Now()) { // Probably spamming outside the maintenance window. This should not be logged as info slog.Debug("reboot required but outside maintenance window", "node", nodeID) @@ -602,11 +615,11 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers. slog.Debug("error retrieving node object via k8s API, retrying at next tick", "node", nodeID, "error", err) continue } - // nodeMeta contains the node Metadata that should be included in the lock - nodeMeta := daemonsetlock.NodeMeta{Unschedulable: node.Spec.Unschedulable} + //// nodeMeta contains the node Metadata that should be included in the lock + //nodeMeta := daemonsetlock.NodeMeta{Unschedulable: node.Spec.Unschedulable} var timeNowString string - if annotateNodes { + if annotateNodeProgress { if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; !ok { timeNowString = time.Now().Format(time.RFC3339) // Annotate this node to indicate that "I am going to be rebooted!" @@ -624,27 +637,32 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers. // Prefer to not schedule pods onto this node to avoid draing the same pod multiple times. preferNoScheduleTaint.Enable() - // This could be merged into a single idempotent "Acquire" lock - holding, _, err := lock.Holding() - if err != nil { - // Not important to worry the admin - slog.Debug("error testing lock", "node", nodeID, "error", err) + holding, err := lock.Acquire() + if err != nil || !holding { + slog.Debug("error acquiring lock, will retry at next tick", "node", nodeID, "error", err) continue } - - if !holding { - acquired, holder, err := lock.Acquire(nodeMeta) - if err != nil { - // Not important to worry the admin - slog.Debug("error acquiring lock, will retry at next tick", "node", nodeID, "error", err) - continue - } - if !acquired { - // Not very important - lock prevents action - slog.Debug(fmt.Sprintf("Lock already held by %v, will retry at next tick", holder), "node", nodeID) - continue - } - } + //// This could be merged into a single idempotent "Acquire" lock + //holding, _, err := lock.Holding() + //if err != nil { + // // Not important to worry the admin + // slog.Debug("error testing lock", "node", nodeID, "error", err) + // continue + //} + // + //if !holding { + // acquired, holder, err := lock.Acquire(nodeMeta) + // if err != nil { + // // Not important to worry the admin + // slog.Debug("error acquiring lock, will retry at next tick", "node", nodeID, "error", err) + // continue + // } + // if !acquired { + // // Not very important - lock prevents action + // slog.Debug(fmt.Sprintf("Lock already held by %v, will retry at next tick", holder), "node", nodeID) + // continue + // } + //} err = drain(client, node, notifier) if err != nil { @@ -659,7 +677,10 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers. // If shown, it is helping understand the uncordonning. If the admin seems the node as cordonned // with this, it needs to take action (for example if the node was previously cordonned!) slog.Info("Performing a best-effort uncordon after failed cordon and drain", "node", nodeID) - uncordon(client, node) + err := uncordon(client, node, notifier) + if err != nil { + slog.Info("Uncordon failed", "error", err) + } continue } } diff --git a/internal/daemonsetlock/daemonsetlock.go b/internal/daemonsetlock/daemonsetlock.go index 54a698613..3df0aab13 100644 --- a/internal/daemonsetlock/daemonsetlock.go +++ b/internal/daemonsetlock/daemonsetlock.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "log/slog" - "strings" "time" v1 "k8s.io/api/apps/v1" @@ -21,9 +20,8 @@ const ( ) type Lock interface { - Acquire(NodeMeta) (bool, string, error) + Acquire() (bool, error) Release() error - Holding() (bool, LockAnnotationValue, error) } type GenericLock struct { @@ -62,9 +60,7 @@ type DaemonSetMultiLock struct { } // LockAnnotationValue contains the lock data, -// which allows persistence across reboots, particularily recording if the -// node was already unschedulable before kured reboot. -// To be modified when using another type of lock storage. +// Metadata is there for migrations reasons. Ignore this as much as possible type LockAnnotationValue struct { NodeID string `json:"nodeID"` Metadata NodeMeta `json:"metadata,omitempty"` @@ -128,32 +124,32 @@ func (dsl *DaemonSetLock) GetDaemonSet(sleep, timeout time.Duration) (*v1.Daemon } // Acquire attempts to annotate the kured daemonset with lock info from instantiated DaemonSetLock using client-go -func (dsl *DaemonSetSingleLock) Acquire(nodeMetadata NodeMeta) (bool, string, error) { +func (dsl *DaemonSetSingleLock) Acquire() (bool, error) { for { ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout) if err != nil { - return false, "", fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) + return false, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) } valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] if exists { value := LockAnnotationValue{} if err := json.Unmarshal([]byte(valueString), &value); err != nil { - return false, "", err + return false, err } if !ttlExpired(value.Created, value.TTL) { - return value.NodeID == dsl.nodeID, value.NodeID, nil + return value.NodeID == dsl.nodeID, nil } } if ds.ObjectMeta.Annotations == nil { ds.ObjectMeta.Annotations = make(map[string]string) } - value := LockAnnotationValue{NodeID: dsl.nodeID, Metadata: nodeMetadata, Created: time.Now().UTC(), TTL: dsl.TTL} + value := LockAnnotationValue{NodeID: dsl.nodeID, Created: time.Now().UTC(), TTL: dsl.TTL} valueBytes, err := json.Marshal(&value) if err != nil { - return false, "", err + return false, err } ds.ObjectMeta.Annotations[dsl.annotation] = string(valueBytes) @@ -164,35 +160,36 @@ func (dsl *DaemonSetSingleLock) Acquire(nodeMetadata NodeMeta) (bool, string, er time.Sleep(time.Second) continue } else { - return false, "", err + return false, err } } - return true, dsl.nodeID, nil + return true, nil } } -// Test attempts to check the kured daemonset lock status (existence, expiry) from instantiated DaemonSetLock using client-go -func (dsl *DaemonSetSingleLock) Holding() (bool, LockAnnotationValue, error) { - var lockData LockAnnotationValue - ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout) - if err != nil { - return false, lockData, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) - } - - valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] - if exists { - value := LockAnnotationValue{} - if err := json.Unmarshal([]byte(valueString), &value); err != nil { - return false, lockData, err - } - - if !ttlExpired(value.Created, value.TTL) { - return value.NodeID == dsl.nodeID, value, nil - } - } - - return false, lockData, nil -} +// +//// Test attempts to check the kured daemonset lock status (existence, expiry) from instantiated DaemonSetLock using client-go +//func (dsl *DaemonSetSingleLock) Holding() (bool, LockAnnotationValue, error) { +// var lockData LockAnnotationValue +// ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout) +// if err != nil { +// return false, lockData, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) +// } +// +// valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] +// if exists { +// value := LockAnnotationValue{} +// if err := json.Unmarshal([]byte(valueString), &value); err != nil { +// return false, lockData, err +// } +// +// if !ttlExpired(value.Created, value.TTL) { +// return value.NodeID == dsl.nodeID, value, nil +// } +// } +// +// return false, lockData, nil +//} // Release attempts to remove the lock data from the kured ds annotations using client-go func (dsl *DaemonSetSingleLock) Release() error { @@ -207,17 +204,18 @@ func (dsl *DaemonSetSingleLock) Release() error { } valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] - if exists { - value := LockAnnotationValue{} - if err := json.Unmarshal([]byte(valueString), &value); err != nil { - return err - } - if value.NodeID != dsl.nodeID { - return fmt.Errorf("not lock holder: %v", value.NodeID) - } - } else { - return fmt.Errorf("lock not held") + if !exists { + return nil + } + + value := LockAnnotationValue{} + if err := json.Unmarshal([]byte(valueString), &value); err != nil { + return err + } + + if value.NodeID != dsl.nodeID { + return fmt.Errorf("not lock holder: %v", value.NodeID) } delete(ds.ObjectMeta.Annotations, dsl.annotation) @@ -243,15 +241,15 @@ func ttlExpired(created time.Time, ttl time.Duration) bool { return false } -func nodeIDsFromMultiLock(annotation multiLockAnnotationValue) []string { - nodeIDs := make([]string, 0, len(annotation.LockAnnotations)) - for _, nodeLock := range annotation.LockAnnotations { - nodeIDs = append(nodeIDs, nodeLock.NodeID) - } - return nodeIDs -} +//func nodeIDsFromMultiLock(annotation multiLockAnnotationValue) []string { +// nodeIDs := make([]string, 0, len(annotation.LockAnnotations)) +// for _, nodeLock := range annotation.LockAnnotations { +// nodeIDs = append(nodeIDs, nodeLock.NodeID) +// } +// return nodeIDs +//} -func (dsl *DaemonSetLock) canAcquireMultiple(annotation multiLockAnnotationValue, metadata NodeMeta, TTL time.Duration, maxOwners int) (bool, multiLockAnnotationValue) { +func (dsl *DaemonSetLock) canAcquireMultiple(annotation multiLockAnnotationValue, TTL time.Duration, maxOwners int) (bool, multiLockAnnotationValue) { newAnnotation := multiLockAnnotationValue{MaxOwners: maxOwners} freeSpace := false if annotation.LockAnnotations == nil || len(annotation.LockAnnotations) < maxOwners { @@ -274,10 +272,9 @@ func (dsl *DaemonSetLock) canAcquireMultiple(annotation multiLockAnnotationValue newAnnotation.LockAnnotations = append( newAnnotation.LockAnnotations, LockAnnotationValue{ - NodeID: dsl.nodeID, - Metadata: metadata, - Created: time.Now().UTC(), - TTL: TTL, + NodeID: dsl.nodeID, + Created: time.Now().UTC(), + TTL: TTL, }, ) return true, newAnnotation @@ -287,24 +284,24 @@ func (dsl *DaemonSetLock) canAcquireMultiple(annotation multiLockAnnotationValue } // Acquire creates and annotates the daemonset with a multiple owner lock -func (dsl *DaemonSetMultiLock) Acquire(nodeMetaData NodeMeta) (bool, string, error) { +func (dsl *DaemonSetMultiLock) Acquire() (bool, error) { for { ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout) if err != nil { - return false, "", fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) + return false, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) } annotation := multiLockAnnotationValue{} valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] if exists { if err := json.Unmarshal([]byte(valueString), &annotation); err != nil { - return false, "", fmt.Errorf("error getting multi lock: %w", err) + return false, fmt.Errorf("error getting multi lock: %w", err) } } - lockPossible, newAnnotation := dsl.canAcquireMultiple(annotation, nodeMetaData, dsl.TTL, dsl.maxOwners) + lockPossible, newAnnotation := dsl.canAcquireMultiple(annotation, dsl.TTL, dsl.maxOwners) if !lockPossible { - return false, strings.Join(nodeIDsFromMultiLock(newAnnotation), ","), nil + return false, nil } if ds.ObjectMeta.Annotations == nil { @@ -312,7 +309,7 @@ func (dsl *DaemonSetMultiLock) Acquire(nodeMetaData NodeMeta) (bool, string, err } newAnnotationBytes, err := json.Marshal(&newAnnotation) if err != nil { - return false, "", fmt.Errorf("error marshalling new annotation lock: %w", err) + return false, fmt.Errorf("error marshalling new annotation lock: %w", err) } ds.ObjectMeta.Annotations[dsl.annotation] = string(newAnnotationBytes) @@ -322,37 +319,38 @@ func (dsl *DaemonSetMultiLock) Acquire(nodeMetaData NodeMeta) (bool, string, err time.Sleep(time.Second) continue } else { - return false, "", fmt.Errorf("error updating daemonset with multi lock: %w", err) + return false, fmt.Errorf("error updating daemonset with multi lock: %w", err) } } - return true, strings.Join(nodeIDsFromMultiLock(newAnnotation), ","), nil + return true, nil } } -// TestMultiple attempts to check the kured daemonset lock status for multi locks -func (dsl *DaemonSetMultiLock) Holding() (bool, LockAnnotationValue, error) { - var lockdata LockAnnotationValue - ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout) - if err != nil { - return false, lockdata, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) - } - - valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] - if exists { - value := multiLockAnnotationValue{} - if err := json.Unmarshal([]byte(valueString), &value); err != nil { - return false, lockdata, err - } - - for _, nodeLock := range value.LockAnnotations { - if nodeLock.NodeID == dsl.nodeID && !ttlExpired(nodeLock.Created, nodeLock.TTL) { - return true, nodeLock, nil - } - } - } - - return false, lockdata, nil -} +// +//// TestMultiple attempts to check the kured daemonset lock status for multi locks +//func (dsl *DaemonSetMultiLock) Holding() (bool, LockAnnotationValue, error) { +// var lockdata LockAnnotationValue +// ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout) +// if err != nil { +// return false, lockdata, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err) +// } +// +// valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation] +// if exists { +// value := multiLockAnnotationValue{} +// if err := json.Unmarshal([]byte(valueString), &value); err != nil { +// return false, lockdata, err +// } +// +// for _, nodeLock := range value.LockAnnotations { +// if nodeLock.NodeID == dsl.nodeID && !ttlExpired(nodeLock.Created, nodeLock.TTL) { +// return true, nodeLock, nil +// } +// } +// } +// +// return false, lockdata, nil +//} // Release attempts to remove the lock data for a single node from the multi node annotation func (dsl *DaemonSetMultiLock) Release() error { @@ -382,9 +380,11 @@ func (dsl *DaemonSetMultiLock) Release() error { } } } - - if !exists || !modified { - return fmt.Errorf("Lock not held") + if !exists { + return nil + } + if !modified { + return fmt.Errorf("lock not held") } newAnnotationBytes, err := json.Marshal(value) diff --git a/internal/daemonsetlock/daemonsetlock_test.go b/internal/daemonsetlock/daemonsetlock_test.go index f68a81e28..4b931776e 100644 --- a/internal/daemonsetlock/daemonsetlock_test.go +++ b/internal/daemonsetlock/daemonsetlock_test.go @@ -183,10 +183,10 @@ func TestCanAcquireMultiple(t *testing.T) { lockPossible: true, }, } - nm := NodeMeta{Unschedulable: false} + for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { - lockPossible, actual := testCase.daemonSetLock.canAcquireMultiple(testCase.current, nm, time.Minute, testCase.maxOwners) + lockPossible, actual := testCase.daemonSetLock.canAcquireMultiple(testCase.current, time.Minute, testCase.maxOwners) if lockPossible != testCase.lockPossible { t.Fatalf( "unexpected result for lock possible (got %t expected %t new annotation %v",