From bba74e78b74419030d691328119c59b068550d28 Mon Sep 17 00:00:00 2001 From: Akshay Iyyadurai Balasundaram Date: Wed, 23 Oct 2024 07:19:09 +0200 Subject: [PATCH 1/2] feat(alerts): Add alert for failing Helm Chart tests Signed-off-by: Akshay Iyyadurai Balasundaram --- charts/manager/alerts/operator.alerts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/charts/manager/alerts/operator.alerts b/charts/manager/alerts/operator.alerts index 12ff26d7a..515978bf7 100644 --- a/charts/manager/alerts/operator.alerts +++ b/charts/manager/alerts/operator.alerts @@ -28,6 +28,15 @@ groups: for: 15m labels: severity: warning + - alert: GreenhousePluginHelmChartTestFailures + expr: | + sum by(plugin, cluster, namespace)(rate(greenhouse_plugin_chart_test_runs_total{result="Error"}[30m])) > 0 + for: 15m + labels: + severity: warning + annotations: + summary: "Helm Chart test failing for plugin {{ $labels.plugin }}" + description: "Helm Chart test for plugin {{ $labels.plugin }} in namespace {{ $labels.namespace }} on cluster {{ $labels.cluster }} has been failing for the last 15 minutes" - name: greenhouse-webhooks.rules rules: - alert: GreenhouseWebhookLatencyHigh From 3869b3f3d7de8beef50c6c10575a12d1d5272429 Mon Sep 17 00:00:00 2001 From: Akshay Iyyadurai Balasundaram Date: Wed, 23 Oct 2024 11:59:44 +0200 Subject: [PATCH 2/2] feat(alerts): Update Helm Chart test duration in operator.alerts Signed-off-by: Akshay Iyyadurai Balasundaram --- charts/manager/alerts/operator.alerts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/manager/alerts/operator.alerts b/charts/manager/alerts/operator.alerts index 515978bf7..8f8165f09 100644 --- a/charts/manager/alerts/operator.alerts +++ b/charts/manager/alerts/operator.alerts @@ -30,13 +30,13 @@ groups: severity: warning - alert: GreenhousePluginHelmChartTestFailures expr: | - sum by(plugin, cluster, namespace)(rate(greenhouse_plugin_chart_test_runs_total{result="Error"}[30m])) > 0 - for: 15m + sum by(plugin, cluster, namespace)(rate(greenhouse_plugin_chart_test_runs_total{result="Error"}[15m])) > 0 + for: 30m labels: severity: warning annotations: summary: "Helm Chart test failing for plugin {{ $labels.plugin }}" - description: "Helm Chart test for plugin {{ $labels.plugin }} in namespace {{ $labels.namespace }} on cluster {{ $labels.cluster }} has been failing for the last 15 minutes" + description: "Helm Chart test for plugin {{ $labels.plugin }} in namespace {{ $labels.namespace }} on cluster {{ $labels.cluster }} has been failing for the last 30 minutes" - name: greenhouse-webhooks.rules rules: - alert: GreenhouseWebhookLatencyHigh