From 3c62994db7b3c1838141990063071313e35491b3 Mon Sep 17 00:00:00 2001 From: "Bryan \"Beege\" Berry" Date: Sun, 11 Aug 2024 19:21:53 +0900 Subject: [PATCH 1/2] fix: Bad PrometheusRule annotations for server down --- charts/prometheus-prefect-exporter/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/prometheus-prefect-exporter/values.yaml b/charts/prometheus-prefect-exporter/values.yaml index c854566a..a55a7ce8 100644 --- a/charts/prometheus-prefect-exporter/values.yaml +++ b/charts/prometheus-prefect-exporter/values.yaml @@ -87,8 +87,8 @@ prometheusRule: # labels: # severity: critical # annotations: - # summary: Flow Run {{ $labels.flow_name }} {{ $labels.state_name }} (Flow {{ $labels.flow_name }}) - # description: "Flow Run failed or crashed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + # summary: Prefect server is down + # description: The total number of Prefect server pods in the Prefect namespace is zero # - alert: PrefectDeploymentsAllPaused # expr: (count by (namespace) (prefect_info_deployment) == bool count by (namespace) (prefect_info_deployment{is_schedule_active="False"})) == 1 From 6f3b81b528e447e046850966064a170fb038b2f4 Mon Sep 17 00:00:00 2001 From: "Bryan \"Beege\" Berry" Date: Thu, 22 Aug 2024 12:50:44 +0900 Subject: [PATCH 2/2] Update charts/prometheus-prefect-exporter/values.yaml Updated alert description per @mitchnielsen's advice Co-authored-by: Mitchell Nielsen --- charts/prometheus-prefect-exporter/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/prometheus-prefect-exporter/values.yaml b/charts/prometheus-prefect-exporter/values.yaml index a55a7ce8..1cee59ae 100644 --- a/charts/prometheus-prefect-exporter/values.yaml +++ b/charts/prometheus-prefect-exporter/values.yaml @@ -88,7 +88,7 @@ prometheusRule: # severity: critical # annotations: # summary: Prefect server is down - # description: The total number of Prefect server pods in the Prefect namespace is zero + # description: There are no Prefect server pods in the "prefect" namespace # - alert: PrefectDeploymentsAllPaused # expr: (count by (namespace) (prefect_info_deployment) == bool count by (namespace) (prefect_info_deployment{is_schedule_active="False"})) == 1