Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve tests #1021

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/on-pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ jobs:
- "TestE2EWithCommand"
- "TestE2EWithSignal"
- "TestE2EConcurrentWithCommand"
- "TestE2EConcurrentWithSignal"
kubernetes_version:
- "previous"
- "current"
Expand Down Expand Up @@ -126,6 +127,7 @@ jobs:
testname:
- "TestCordonningIsKept/concurrency1"
- "TestCordonningIsKept/concurrency2"
- "TestE2EBlocker/podblocker"
steps:
- uses: actions/checkout@v4
- name: Ensure go version
Expand All @@ -147,5 +149,9 @@ jobs:
with:
install_only: true
version: v0.22.0
# Keep this until v1.31 (or superior) becomes the default kubectl version for the kind-action.
# It is used in podblocker shell script test to use --all-pods.
# If the podblocker e2e test relies on another way, this can also be removed.
kubectl_version: v1.31.0
- name: Run specific e2e tests
run: make e2e-test ARGS="-run ^${{ matrix.testname }}"
run: make e2e-test ARGS="-run ^${{ matrix.testname }}"
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ bootstrap-tools: $(HACKDIR)
command -v $(HACKDIR)/cosign || curl -sSfL https://github.com/sigstore/cosign/releases/download/v2.2.3/cosign-linux-amd64 -o $(HACKDIR)/cosign
command -v $(HACKDIR)/shellcheck || (curl -sSfL https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz | tar -J -v -x shellcheck-stable/shellcheck && mv shellcheck-stable/shellcheck $(HACKDIR)/shellcheck && rmdir shellcheck-stable)
chmod +x $(HACKDIR)/goreleaser $(HACKDIR)/cosign $(HACKDIR)/syft $(HACKDIR)/shellcheck
# go install honnef.co/go/tools/cmd/staticcheck@latest
command -v staticcheck || go install honnef.co/go/tools/cmd/staticcheck@latest

clean:
rm -rf ./dist
Expand Down Expand Up @@ -47,9 +47,12 @@ dev-manifest:
sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' kured-ds.yaml > tests/kind/testfiles/kured-ds.yaml
# signal e2e scenario
sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' kured-ds-signal.yaml > tests/kind/testfiles/kured-ds-signal.yaml
# concurrency e2e scenario
sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' -e 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml > tests/kind/testfiles/kured-ds-concurrent.yaml

# concurrency e2e command scenario
sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' -e 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml > tests/kind/testfiles/kured-ds-concurrent-command.yaml
# concurrency e2e signal scenario
sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' -e 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds-signal.yaml > tests/kind/testfiles/kured-ds-concurrent-signal.yaml
# pod blocker e2e signal scenario
sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' -e 's/#\(.*\)--blocking-pod-selector=name=temperamental/\1--blocking-pod-selector=app=blocker/g' kured-ds-signal.yaml > tests/kind/testfiles/kured-ds-podblocker.yaml

e2e-test: dev-manifest dev-image
echo "Running ALL go tests"
Expand All @@ -68,4 +71,4 @@ test: bootstrap-tools
go test -test.short -json ./... > test.json
echo "Running shellcheck"
find . -name '*.sh' | xargs -n1 $(HACKDIR)/shellcheck
# Need to add staticcheck to replace golint as golint is deprecated, and staticcheck is the recommendation
staticcheck ./...
4 changes: 2 additions & 2 deletions cmd/kured/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,11 @@ func validateNotificationURL(notifyURL string, slackHookURL string) string {
log.Errorf("slack-hook-url is not properly formatted... no notification will be sent: %v\n", err)
return ""
}
if len(strings.Split(strings.Trim(parsedURL.Path, "/services/"), "/")) != 3 {
if len(strings.Split(strings.Replace(parsedURL.Path, "/services/", "", -1), "/")) != 3 {
log.Errorf("slack-hook-url is not properly formatted... no notification will be sent: unexpected number of / in URL\n")
return ""
}
return fmt.Sprintf("slack://%s", strings.Trim(parsedURL.Path, "/services/"))
return fmt.Sprintf("slack://%s", strings.Replace(parsedURL.Path, "/services/", "", -1))
}
return ""
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/daemonsetlock/daemonsetlock.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,10 @@ func (dsl *DaemonSetSingleLock) Release() error {
}

if value.NodeID != dsl.nodeID {
return fmt.Errorf("Not lock holder: %v", value.NodeID)
return fmt.Errorf("not lock holder: %v", value.NodeID)
}
} else {
return fmt.Errorf("Lock not held")
return fmt.Errorf("lock not held")
}

delete(ds.ObjectMeta.Annotations, dsl.annotation)
Expand Down
4 changes: 2 additions & 2 deletions pkg/timewindow/days.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ func parseWeekday(day string) (time.Weekday, error) {
if n >= 0 && n < 7 {
return time.Weekday(n), nil
}
return time.Sunday, fmt.Errorf("Invalid weekday, number out of range: %s", day)
return time.Sunday, fmt.Errorf("invalid weekday, number out of range: %s", day)
}

if weekday, ok := dayStrings[strings.ToLower(day)]; ok {
return weekday, nil
}
return time.Sunday, fmt.Errorf("Invalid weekday: %s", day)
return time.Sunday, fmt.Errorf("invalid weekday: %s", day)
}
2 changes: 1 addition & 1 deletion pkg/timewindow/timewindow.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,5 @@ func parseTime(s string, loc *time.Location) (time.Time, error) {
}
}

return time.Now(), fmt.Errorf("Invalid time format: %s", s)
return time.Now(), fmt.Errorf("invalid time format: %s", s)
}
119 changes: 106 additions & 13 deletions tests/kind/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func TestE2EWithCommand(t *testing.T) {
t.Run(version, func(t *testing.T) {
t.Parallel() // Allow tests to run in parallel

randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100)))
randomInt := strconv.Itoa(rand.Intn(100))
kindClusterName := fmt.Sprintf("kured-e2e-command-%v-%v", version, randomInt)
kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version)
kindContext := fmt.Sprintf("kind-%v", kindClusterName)
Expand Down Expand Up @@ -202,7 +202,7 @@ func TestE2EWithSignal(t *testing.T) {
t.Run(version, func(t *testing.T) {
t.Parallel() // Allow tests to run in parallel

randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100)))
randomInt := strconv.Itoa(rand.Intn(100))
kindClusterName := fmt.Sprintf("kured-e2e-signal-%v-%v", version, randomInt)
kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version)
kindContext := fmt.Sprintf("kind-%v", kindClusterName)
Expand Down Expand Up @@ -252,12 +252,12 @@ func TestE2EConcurrentWithCommand(t *testing.T) {
t.Run(version, func(t *testing.T) {
t.Parallel() // Allow tests to run in parallel

randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100)))
randomInt := strconv.Itoa(rand.Intn(100))
kindClusterName := fmt.Sprintf("kured-e2e-concurrentcommand-%v-%v", version, randomInt)
kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version)
kindContext := fmt.Sprintf("kind-%v", kindClusterName)

k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy("testfiles/kured-ds-concurrent.yaml"))
k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy("testfiles/kured-ds-concurrent-command.yaml"))
defer k.FlushLog()

err := k.Create()
Expand All @@ -284,15 +284,16 @@ func TestE2EConcurrentWithCommand(t *testing.T) {
}
}

func TestCordonningIsKept(t *testing.T) {
func TestE2EConcurrentWithSignal(t *testing.T) {
t.Parallel()
if testing.Short() {
t.Skip("skipping test in short mode.")
}

var kindClusterConfigs = []string{
"concurrency1",
"concurrency2",
"previous",
"current",
"next",
}
// Iterate over each Kubernetes version
for _, version := range kindClusterConfigs {
Expand All @@ -301,16 +302,65 @@ func TestCordonningIsKept(t *testing.T) {
t.Run(version, func(t *testing.T) {
t.Parallel() // Allow tests to run in parallel

randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100)))
kindClusterName := fmt.Sprintf("kured-e2e-cordon-%v-%v", version, randomInt)
kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-next.yaml")
randomInt := strconv.Itoa(rand.Intn(100))
kindClusterName := fmt.Sprintf("kured-e2e-concurrentsignal-%v-%v", version, randomInt)
kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version)
kindContext := fmt.Sprintf("kind-%v", kindClusterName)

k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy("testfiles/kured-ds-concurrent-signal.yaml"))
defer k.FlushLog()

err := k.Create()
if err != nil {
t.Fatalf("Error creating cluster %v", err)
}
defer func(k *KindTest) {
err := k.Destroy()
if err != nil {
t.Fatalf("Error destroying cluster %v", err)
}
}(k)

k.Write([]byte("Now running e2e tests"))

if err := k.RunCmd("bash", "testfiles/create-reboot-sentinels.sh", kindContext); err != nil {
t.Fatalf("failed to create sentinels: %v", err)
}

if err := k.RunCmd("bash", "testfiles/follow-coordinated-reboot.sh", kindContext); err != nil {
t.Fatalf("failed to follow reboot: %v", err)
}
})
}
}

func TestCordonningIsKept(t *testing.T) {
t.Parallel()
if testing.Short() {
t.Skip("skipping test in short mode.")
}

var kindClusterConfigs = []string{
"concurrency1",
"concurrency2",
}
// Iterate over each test variant
for _, variant := range kindClusterConfigs {
variant := variant
// Define a subtest for each combination
t.Run(variant, func(t *testing.T) {
t.Parallel() // Allow tests to run in parallel

randomInt := strconv.Itoa(rand.Intn(100))
kindClusterName := fmt.Sprintf("kured-e2e-cordon-%v-%v", variant, randomInt)
kindClusterConfigFile := "../../.github/kind-cluster-next.yaml"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wonder why we're not iterating over all k8s versions for this test?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasted CPU? I am not expecting the behaviour to change across different versions of kubernetes. Hence testing the latest version sounds enough. That reasoning can be applied in most of the tests...

Do you feel we should simplify this?
Here is what I find necessary:

  • Test that the reboot works, whether we are using the signal or the command, regardless of the kubernetes version (hence, test all k8s versions).
  • Test that the concurrency works regardless of the kubernetes version or the reboot method used
  • Test that all advanced features work (blocking with pods, keeping the node cordonned, ...)

WDYT?

kindContext := fmt.Sprintf("kind-%v", kindClusterName)

var manifest string
if version == "concurrency1" {
manifest = fmt.Sprintf("testfiles/kured-ds.yaml")
if variant == "concurrency1" {
manifest = "testfiles/kured-ds-signal.yaml"
} else {
manifest = fmt.Sprintf("testfiles/kured-ds-concurrent.yaml")
manifest = "testfiles/kured-ds-concurrent-signal.yaml"
}
k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy(manifest))
defer k.FlushLog()
Expand All @@ -334,3 +384,46 @@ func TestCordonningIsKept(t *testing.T) {
})
}
}
func TestE2EBlocker(t *testing.T) {
t.Parallel()
if testing.Short() {
t.Skip("skipping test in short mode.")
}

var kindClusterConfigs = []string{
"podblocker",
}
// Iterate over each variant of the test
for _, variant := range kindClusterConfigs {
variant := variant
// Define a subtest for each combination
t.Run(variant, func(t *testing.T) {
t.Parallel() // Allow tests to run in parallel

randomInt := strconv.Itoa(rand.Intn(100))
kindClusterName := fmt.Sprintf("kured-e2e-cordon-%v-%v", variant, randomInt)
kindClusterConfigFile := "../../.github/kind-cluster-next.yaml"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto above comment

kindContext := fmt.Sprintf("kind-%v", kindClusterName)

k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy(fmt.Sprintf("testfiles/kured-ds-%v.yaml", variant)))
defer k.FlushLog()

err := k.Create()
if err != nil {
t.Fatalf("Error creating cluster %v", err)
}
defer func(k *KindTest) {
err := k.Destroy()
if err != nil {
t.Fatalf("Error destroying cluster %v", err)
}
}(k)

k.Write([]byte("Now running e2e tests"))

if err := k.RunCmd("bash", fmt.Sprintf("testfiles/%v.sh", variant), kindContext); err != nil {
t.Fatalf("node blocker test did not succeed: %v", err)
}
})
}
}
54 changes: 54 additions & 0 deletions tests/kind/testfiles/podblocker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash

kubectl_flags=( )
[[ "$1" != "" ]] && kubectl_flags=("${kubectl_flags[@]}" --context "$1")

function gather_logs_and_cleanup {
for id in $(docker ps -q); do
echo "############################################################"
echo "docker logs for container $id:"
docker logs "$id"
done
${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" logs ds/kured --all-pods -n kube-system
}
trap gather_logs_and_cleanup EXIT

set +o errexit
worker=$(${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" get nodes -o custom-columns=name:metadata.name --no-headers | grep worker | head -n 1)

${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" label nodes "$worker" blocked-host=yes

${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" apply -f - << EOF
apiVersion: v1
kind: Pod
metadata:
name: nginx
labels:
app: blocker
spec:
containers:
- name: nginx
image: nginx
imagePullPolicy: IfNotPresent
nodeSelector:
blocked-host: "yes"
EOF

docker exec "$worker" touch "${SENTINEL_FILE:-/var/run/reboot-required}"

set -o errexit
max_attempts="100"
attempt_num=1
sleep_time=5

until ${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" logs ds/kured --all-pods -n kube-system | grep -i -e "Reboot.*blocked"
do
if (( attempt_num == max_attempts )); then
echo "Attempt $attempt_num failed and there are no more attempts left!"
exit 1
else
echo "Did not find 'reboot blocked' in the log, retrying in $sleep_time seconds (Attempt #$attempt_num)"
sleep "$sleep_time"
fi
(( attempt_num++ ))
done