Skip to content

Commit

Permalink
Avoid errors when multiple events are present (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
treydock authored Jul 12, 2023
1 parent 0131f77 commit 24e9d69
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
10 changes: 10 additions & 0 deletions collectors/mmhealth.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ func mmhealth_parse(out string, logger log.Logger) []HealthMetric {
mmhealthIgnoredEntityTypePattern := regexp.MustCompile(*mmhealthIgnoredEntityType)
mmhealthIgnoredEventPattern := regexp.MustCompile(*mmhealthIgnoredEvent)
var metrics []HealthMetric
var eventKeys []string
lines := strings.Split(out, "\n")
typeHeaders := make(map[string][]string)
for _, line := range lines {
Expand Down Expand Up @@ -208,6 +209,15 @@ func mmhealth_parse(out string, logger log.Logger) []HealthMetric {
level.Debug(logger).Log("msg", "Skipping event due to ignored pattern", "event", metric.Event)
continue
}
if metric.Type == "Event" {
eventKey := fmt.Sprintf("%s-%s-%s-%s", metric.Component, metric.EntityName, metric.EntityType, metric.Event)
if SliceContains(eventKeys, eventKey) {
level.Debug(logger).Log("msg", "Skipping event as already encountered", "event", metric.Event)
continue
} else {
eventKeys = append(eventKeys, eventKey)
}
}
metrics = append(metrics, metric)
}
return metrics
Expand Down
23 changes: 13 additions & 10 deletions collectors/mmhealth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ mmhealth:State:HEADER:version:reserved:reserved:node:component:entityname:entity
mmhealth:State:0:1:::ib-haswell1.example.com:NODE:ib-haswell1.example.com:NODE:TIPS:2020-01-27 09%3A35%3A21.859186 EST:
mmhealth:State:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:TIPS:2020-01-27 09%3A35%3A21.791895 EST:
mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:gpfs_pagepool_small::2020-01-07 16%3A47%3A43.892296 EST::no:
mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:cluster_connections_down:10.22.51.57,1,1:2023-07-05 16%3A33%3A11.224969 EDT:10.22.51.57:no:Connection to cluster node 10.22.51.57 has all 1 connection(s) down. (Maximum 1).:STATE_CHANGE:WARNING:
mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:cluster_connections_down:10.22.95.17,1,1:2023-07-05 09%3A56%3A59.071165 EDT:10.22.95.17:no:Connection to cluster node 10.22.95.17 has all 1 connection(s) down. (Maximum 1).:STATE_CHANGE:WARNING:
mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:ib-haswell1.example.com:NODE:HEALTHY:2020-01-07 17%3A02%3A40.131272 EST:
mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:ib0:NIC:HEALTHY:2020-01-07 16%3A47%3A39.397852 EST:
mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:mlx5_0/1:IB_RDMA:FOO:2020-01-07 17%3A02%3A40.205075 EST:
Expand Down Expand Up @@ -96,8 +98,8 @@ func TestParseMmhealth(t *testing.T) {
w := log.NewSyncWriter(os.Stderr)
logger := log.NewLogfmtLogger(w)
metrics := mmhealth_parse(mmhealthStdout, logger)
if len(metrics) != 10 {
t.Errorf("Expected 10 metrics returned, got %d", len(metrics))
if len(metrics) != 11 {
t.Errorf("Expected 11 metrics returned, got %d", len(metrics))
return
}
if val := metrics[0].Component; val != "NODE" {
Expand Down Expand Up @@ -133,8 +135,8 @@ func TestParseMmhealthIgnores(t *testing.T) {
mmhealthIgnoredEntityType = &noignore
mmhealthIgnoredEvent = &eventIgnore
metrics := mmhealth_parse(mmhealthStdout, log.NewNopLogger())
if len(metrics) != 5 {
t.Errorf("Expected 5 metrics returned, got %d", len(metrics))
if len(metrics) != 6 {
t.Errorf("Expected 6 metrics returned, got %d", len(metrics))
return
}
ignore = "ess"
Expand All @@ -143,8 +145,8 @@ func TestParseMmhealthIgnores(t *testing.T) {
mmhealthIgnoredEntityType = &noignore
mmhealthIgnoredEvent = &empty
metrics = mmhealth_parse(mmhealthStdout, log.NewNopLogger())
if len(metrics) != 9 {
t.Errorf("Expected 9 metrics returned, got %d", len(metrics))
if len(metrics) != 10 {
t.Errorf("Expected 10 metrics returned, got %d", len(metrics))
return
}
ignore = "FILESYSTEM"
Expand All @@ -153,8 +155,8 @@ func TestParseMmhealthIgnores(t *testing.T) {
mmhealthIgnoredEntityType = &ignore
mmhealthIgnoredEvent = &empty
metrics = mmhealth_parse(mmhealthStdout, log.NewNopLogger())
if len(metrics) != 7 {
t.Errorf("Expected 7 metrics returned, got %d", len(metrics))
if len(metrics) != 8 {
t.Errorf("Expected 8 metrics returned, got %d", len(metrics))
return
}
}
Expand All @@ -173,6 +175,7 @@ func TestMmhealthCollector(t *testing.T) {
expected := `
# HELP gpfs_health_event GPFS health event
# TYPE gpfs_health_event gauge
gpfs_health_event{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",event="cluster_connections_down"} 1
gpfs_health_event{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",event="gpfs_pagepool_small"} 1
# HELP gpfs_health_status GPFS health status
# TYPE gpfs_health_status gauge
Expand Down Expand Up @@ -282,8 +285,8 @@ func TestMmhealthCollector(t *testing.T) {
gatherers := setupGatherer(collector)
if val, err := testutil.GatherAndCount(gatherers); err != nil {
t.Errorf("Unexpected error: %v", err)
} else if val != 103 {
t.Errorf("Unexpected collection count %d, expected 103", val)
} else if val != 104 {
t.Errorf("Unexpected collection count %d, expected 104", val)
}
if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_health_status", "gpfs_health_event"); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
Expand Down

0 comments on commit 24e9d69

Please sign in to comment.