diff --git a/collectors/mmhealth.go b/collectors/mmhealth.go index 6b4455c..b274e57 100644 --- a/collectors/mmhealth.go +++ b/collectors/mmhealth.go @@ -148,6 +148,7 @@ func mmhealth_parse(out string, logger log.Logger) []HealthMetric { mmhealthIgnoredEntityTypePattern := regexp.MustCompile(*mmhealthIgnoredEntityType) mmhealthIgnoredEventPattern := regexp.MustCompile(*mmhealthIgnoredEvent) var metrics []HealthMetric + var eventKeys []string lines := strings.Split(out, "\n") typeHeaders := make(map[string][]string) for _, line := range lines { @@ -208,6 +209,15 @@ func mmhealth_parse(out string, logger log.Logger) []HealthMetric { level.Debug(logger).Log("msg", "Skipping event due to ignored pattern", "event", metric.Event) continue } + if metric.Type == "Event" { + eventKey := fmt.Sprintf("%s-%s-%s-%s", metric.Component, metric.EntityName, metric.EntityType, metric.Event) + if SliceContains(eventKeys, eventKey) { + level.Debug(logger).Log("msg", "Skipping event as already encountered", "event", metric.Event) + continue + } else { + eventKeys = append(eventKeys, eventKey) + } + } metrics = append(metrics, metric) } return metrics diff --git a/collectors/mmhealth_test.go b/collectors/mmhealth_test.go index ec441d4..9484766 100644 --- a/collectors/mmhealth_test.go +++ b/collectors/mmhealth_test.go @@ -34,6 +34,8 @@ mmhealth:State:HEADER:version:reserved:reserved:node:component:entityname:entity mmhealth:State:0:1:::ib-haswell1.example.com:NODE:ib-haswell1.example.com:NODE:TIPS:2020-01-27 09%3A35%3A21.859186 EST: mmhealth:State:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:TIPS:2020-01-27 09%3A35%3A21.791895 EST: mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:gpfs_pagepool_small::2020-01-07 16%3A47%3A43.892296 EST::no: +mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:cluster_connections_down:10.22.51.57,1,1:2023-07-05 16%3A33%3A11.224969 EDT:10.22.51.57:no:Connection to cluster node 10.22.51.57 has all 1 connection(s) down. (Maximum 1).:STATE_CHANGE:WARNING: +mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:cluster_connections_down:10.22.95.17,1,1:2023-07-05 09%3A56%3A59.071165 EDT:10.22.95.17:no:Connection to cluster node 10.22.95.17 has all 1 connection(s) down. (Maximum 1).:STATE_CHANGE:WARNING: mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:ib-haswell1.example.com:NODE:HEALTHY:2020-01-07 17%3A02%3A40.131272 EST: mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:ib0:NIC:HEALTHY:2020-01-07 16%3A47%3A39.397852 EST: mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:mlx5_0/1:IB_RDMA:FOO:2020-01-07 17%3A02%3A40.205075 EST: @@ -96,8 +98,8 @@ func TestParseMmhealth(t *testing.T) { w := log.NewSyncWriter(os.Stderr) logger := log.NewLogfmtLogger(w) metrics := mmhealth_parse(mmhealthStdout, logger) - if len(metrics) != 10 { - t.Errorf("Expected 10 metrics returned, got %d", len(metrics)) + if len(metrics) != 11 { + t.Errorf("Expected 11 metrics returned, got %d", len(metrics)) return } if val := metrics[0].Component; val != "NODE" { @@ -133,8 +135,8 @@ func TestParseMmhealthIgnores(t *testing.T) { mmhealthIgnoredEntityType = &noignore mmhealthIgnoredEvent = &eventIgnore metrics := mmhealth_parse(mmhealthStdout, log.NewNopLogger()) - if len(metrics) != 5 { - t.Errorf("Expected 5 metrics returned, got %d", len(metrics)) + if len(metrics) != 6 { + t.Errorf("Expected 6 metrics returned, got %d", len(metrics)) return } ignore = "ess" @@ -143,8 +145,8 @@ func TestParseMmhealthIgnores(t *testing.T) { mmhealthIgnoredEntityType = &noignore mmhealthIgnoredEvent = &empty metrics = mmhealth_parse(mmhealthStdout, log.NewNopLogger()) - if len(metrics) != 9 { - t.Errorf("Expected 9 metrics returned, got %d", len(metrics)) + if len(metrics) != 10 { + t.Errorf("Expected 10 metrics returned, got %d", len(metrics)) return } ignore = "FILESYSTEM" @@ -153,8 +155,8 @@ func TestParseMmhealthIgnores(t *testing.T) { mmhealthIgnoredEntityType = &ignore mmhealthIgnoredEvent = &empty metrics = mmhealth_parse(mmhealthStdout, log.NewNopLogger()) - if len(metrics) != 7 { - t.Errorf("Expected 7 metrics returned, got %d", len(metrics)) + if len(metrics) != 8 { + t.Errorf("Expected 8 metrics returned, got %d", len(metrics)) return } } @@ -173,6 +175,7 @@ func TestMmhealthCollector(t *testing.T) { expected := ` # HELP gpfs_health_event GPFS health event # TYPE gpfs_health_event gauge + gpfs_health_event{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",event="cluster_connections_down"} 1 gpfs_health_event{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",event="gpfs_pagepool_small"} 1 # HELP gpfs_health_status GPFS health status # TYPE gpfs_health_status gauge @@ -282,8 +285,8 @@ func TestMmhealthCollector(t *testing.T) { gatherers := setupGatherer(collector) if val, err := testutil.GatherAndCount(gatherers); err != nil { t.Errorf("Unexpected error: %v", err) - } else if val != 103 { - t.Errorf("Unexpected collection count %d, expected 103", val) + } else if val != 104 { + t.Errorf("Unexpected collection count %d, expected 104", val) } if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_health_status", "gpfs_health_event"); err != nil { t.Errorf("unexpected collecting result:\n%s", err)