From a0a583ef800e7ef862b1906322ac90a00b2e3aa4 Mon Sep 17 00:00:00 2001 From: Trey Dockendorf Date: Thu, 19 Nov 2020 12:10:53 -0500 Subject: [PATCH] Enumerate all possible states for mmces and statuses for mmhealth --- CHANGELOG.md | 3 +- collectors/mmces.go | 15 +++- collectors/mmces_test.go | 143 ++++++++++++++++++++++++++++++++++-- collectors/mmhealth.go | 18 ++++- collectors/mmhealth_test.go | 98 +++++++++++++++++++++++- 5 files changed, 262 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fd9ee3..7b336d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,8 @@ * gpfs_fs_metadata_free_percent * gpfs_fs_free_percent * Remove nodename label from gpfs_perf_* metrics, replace with gpfs_perf_info metric -* mmces and mmhealth status metrics will always have value 1, only the `status` label will change +* mmces state metrics will have one metric per possible state, with active state having value 1 +* mmhealth status metrics will have one metric per possible status with active status having value 1 ### Changes diff --git a/collectors/mmces.go b/collectors/mmces.go index decb77e..87a1caf 100644 --- a/collectors/mmces.go +++ b/collectors/mmces.go @@ -32,6 +32,7 @@ var ( configNodeName = kingpin.Flag("collector.mmces.nodename", "CES node name to check, defaults to FQDN").Default("").String() mmcesTimeout = kingpin.Flag("collector.mmces.timeout", "Timeout for mmces execution").Default("5").Int() cesServices = []string{"AUTH", "BLOCK", "NETWORK", "AUTH_OBJ", "NFS", "OBJ", "SMB", "CES"} + cesStates = []string{"DEGRADED", "DEPEND", "DISABLED", "FAILED", "HEALTHY", "STARTING", "STOPPED", "SUSPENDED"} mmcesExec = mmces ) @@ -94,7 +95,19 @@ func (c *MmcesCollector) Collect(ch chan<- prometheus.Metric) { errorMetric = 1 } for _, m := range metrics { - ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, 1, m.Service, m.State) + for _, s := range cesStates { + var value float64 + if s == m.State { + value = 1 + } + ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, value, m.Service, s) + } + var unknown float64 + if !SliceContains(cesStates, m.State) { + unknown = 1 + level.Warn(c.logger).Log("msg", "Unknown state encountered", "state", m.State, "service", m.Service) + } + ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, unknown, m.Service, "UNKNOWN") } ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "mmces") ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "mmces") diff --git a/collectors/mmces_test.go b/collectors/mmces_test.go index 38d6929..512514f 100644 --- a/collectors/mmces_test.go +++ b/collectors/mmces_test.go @@ -29,7 +29,7 @@ import ( var ( mmcesStdout = ` mmcesstate::HEADER:version:reserved:reserved:NODE:AUTH:BLOCK:NETWORK:AUTH_OBJ:NFS:OBJ:SMB:CES: -mmcesstate::0:1:::ib-protocol01.domain:HEALTHY:DISABLED:HEALTHY:DISABLED:HEALTHY:DISABLED:HEALTHY:HEALTHY: +mmcesstate::0:1:::ib-protocol01.domain:HEALTHY:DISABLED:HEALTHY:DISABLED:HEALTHY:DISABLED:FOO:HEALTHY: ` ) @@ -121,24 +121,89 @@ func TestMMcesCollector(t *testing.T) { mmcesExec = func(nodename string, ctx context.Context) (string, error) { return mmcesStdout, nil } + expected := ` # HELP gpfs_ces_state GPFS CES health status # TYPE gpfs_ces_state gauge + gpfs_ces_state{service="AUTH",state="DEGRADED"} 0 + gpfs_ces_state{service="AUTH",state="DEPEND"} 0 + gpfs_ces_state{service="AUTH",state="DISABLED"} 0 + gpfs_ces_state{service="AUTH",state="FAILED"} 0 gpfs_ces_state{service="AUTH",state="HEALTHY"} 1 + gpfs_ces_state{service="AUTH",state="STARTING"} 0 + gpfs_ces_state{service="AUTH",state="STOPPED"} 0 + gpfs_ces_state{service="AUTH",state="SUSPENDED"} 0 + gpfs_ces_state{service="AUTH",state="UNKNOWN"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="DEGRADED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="DEPEND"} 0 gpfs_ces_state{service="AUTH_OBJ",state="DISABLED"} 1 + gpfs_ces_state{service="AUTH_OBJ",state="FAILED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="HEALTHY"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="STARTING"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="STOPPED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="SUSPENDED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="UNKNOWN"} 0 + gpfs_ces_state{service="BLOCK",state="DEGRADED"} 0 + gpfs_ces_state{service="BLOCK",state="DEPEND"} 0 gpfs_ces_state{service="BLOCK",state="DISABLED"} 1 + gpfs_ces_state{service="BLOCK",state="FAILED"} 0 + gpfs_ces_state{service="BLOCK",state="HEALTHY"} 0 + gpfs_ces_state{service="BLOCK",state="STARTING"} 0 + gpfs_ces_state{service="BLOCK",state="STOPPED"} 0 + gpfs_ces_state{service="BLOCK",state="SUSPENDED"} 0 + gpfs_ces_state{service="BLOCK",state="UNKNOWN"} 0 + gpfs_ces_state{service="CES",state="DEGRADED"} 0 + gpfs_ces_state{service="CES",state="DEPEND"} 0 + gpfs_ces_state{service="CES",state="DISABLED"} 0 + gpfs_ces_state{service="CES",state="FAILED"} 0 gpfs_ces_state{service="CES",state="HEALTHY"} 1 + gpfs_ces_state{service="CES",state="STARTING"} 0 + gpfs_ces_state{service="CES",state="STOPPED"} 0 + gpfs_ces_state{service="CES",state="SUSPENDED"} 0 + gpfs_ces_state{service="CES",state="UNKNOWN"} 0 + gpfs_ces_state{service="NETWORK",state="DEGRADED"} 0 + gpfs_ces_state{service="NETWORK",state="DEPEND"} 0 + gpfs_ces_state{service="NETWORK",state="DISABLED"} 0 + gpfs_ces_state{service="NETWORK",state="FAILED"} 0 gpfs_ces_state{service="NETWORK",state="HEALTHY"} 1 + gpfs_ces_state{service="NETWORK",state="STARTING"} 0 + gpfs_ces_state{service="NETWORK",state="STOPPED"} 0 + gpfs_ces_state{service="NETWORK",state="SUSPENDED"} 0 + gpfs_ces_state{service="NETWORK",state="UNKNOWN"} 0 + gpfs_ces_state{service="NFS",state="DEGRADED"} 0 + gpfs_ces_state{service="NFS",state="DEPEND"} 0 + gpfs_ces_state{service="NFS",state="DISABLED"} 0 + gpfs_ces_state{service="NFS",state="FAILED"} 0 gpfs_ces_state{service="NFS",state="HEALTHY"} 1 + gpfs_ces_state{service="NFS",state="STARTING"} 0 + gpfs_ces_state{service="NFS",state="STOPPED"} 0 + gpfs_ces_state{service="NFS",state="SUSPENDED"} 0 + gpfs_ces_state{service="NFS",state="UNKNOWN"} 0 + gpfs_ces_state{service="OBJ",state="DEGRADED"} 0 + gpfs_ces_state{service="OBJ",state="DEPEND"} 0 gpfs_ces_state{service="OBJ",state="DISABLED"} 1 - gpfs_ces_state{service="SMB",state="HEALTHY"} 1 + gpfs_ces_state{service="OBJ",state="FAILED"} 0 + gpfs_ces_state{service="OBJ",state="HEALTHY"} 0 + gpfs_ces_state{service="OBJ",state="STARTING"} 0 + gpfs_ces_state{service="OBJ",state="STOPPED"} 0 + gpfs_ces_state{service="OBJ",state="SUSPENDED"} 0 + gpfs_ces_state{service="OBJ",state="UNKNOWN"} 0 + gpfs_ces_state{service="SMB",state="DEGRADED"} 0 + gpfs_ces_state{service="SMB",state="DEPEND"} 0 + gpfs_ces_state{service="SMB",state="DISABLED"} 0 + gpfs_ces_state{service="SMB",state="FAILED"} 0 + gpfs_ces_state{service="SMB",state="HEALTHY"} 0 + gpfs_ces_state{service="SMB",state="STARTING"} 0 + gpfs_ces_state{service="SMB",state="STOPPED"} 0 + gpfs_ces_state{service="SMB",state="SUSPENDED"} 0 + gpfs_ces_state{service="SMB",state="UNKNOWN"} 1 ` collector := NewMmcesCollector(log.NewNopLogger()) gatherers := setupGatherer(collector) if val, err := testutil.GatherAndCount(gatherers); err != nil { t.Errorf("Unexpected error: %v", err) - } else if val != 11 { - t.Errorf("Unexpected collection count %d, expected 11", val) + } else if val != 75 { + t.Errorf("Unexpected collection count %d, expected 75", val) } if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_ces_state"); err != nil { t.Errorf("unexpected collecting result:\n%s", err) @@ -155,21 +220,85 @@ func TestMMcesCollectorHostname(t *testing.T) { expected := ` # HELP gpfs_ces_state GPFS CES health status # TYPE gpfs_ces_state gauge + gpfs_ces_state{service="AUTH",state="DEGRADED"} 0 + gpfs_ces_state{service="AUTH",state="DEPEND"} 0 + gpfs_ces_state{service="AUTH",state="DISABLED"} 0 + gpfs_ces_state{service="AUTH",state="FAILED"} 0 gpfs_ces_state{service="AUTH",state="HEALTHY"} 1 + gpfs_ces_state{service="AUTH",state="STARTING"} 0 + gpfs_ces_state{service="AUTH",state="STOPPED"} 0 + gpfs_ces_state{service="AUTH",state="SUSPENDED"} 0 + gpfs_ces_state{service="AUTH",state="UNKNOWN"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="DEGRADED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="DEPEND"} 0 gpfs_ces_state{service="AUTH_OBJ",state="DISABLED"} 1 + gpfs_ces_state{service="AUTH_OBJ",state="FAILED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="HEALTHY"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="STARTING"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="STOPPED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="SUSPENDED"} 0 + gpfs_ces_state{service="AUTH_OBJ",state="UNKNOWN"} 0 + gpfs_ces_state{service="BLOCK",state="DEGRADED"} 0 + gpfs_ces_state{service="BLOCK",state="DEPEND"} 0 gpfs_ces_state{service="BLOCK",state="DISABLED"} 1 + gpfs_ces_state{service="BLOCK",state="FAILED"} 0 + gpfs_ces_state{service="BLOCK",state="HEALTHY"} 0 + gpfs_ces_state{service="BLOCK",state="STARTING"} 0 + gpfs_ces_state{service="BLOCK",state="STOPPED"} 0 + gpfs_ces_state{service="BLOCK",state="SUSPENDED"} 0 + gpfs_ces_state{service="BLOCK",state="UNKNOWN"} 0 + gpfs_ces_state{service="CES",state="DEGRADED"} 0 + gpfs_ces_state{service="CES",state="DEPEND"} 0 + gpfs_ces_state{service="CES",state="DISABLED"} 0 + gpfs_ces_state{service="CES",state="FAILED"} 0 gpfs_ces_state{service="CES",state="HEALTHY"} 1 + gpfs_ces_state{service="CES",state="STARTING"} 0 + gpfs_ces_state{service="CES",state="STOPPED"} 0 + gpfs_ces_state{service="CES",state="SUSPENDED"} 0 + gpfs_ces_state{service="CES",state="UNKNOWN"} 0 + gpfs_ces_state{service="NETWORK",state="DEGRADED"} 0 + gpfs_ces_state{service="NETWORK",state="DEPEND"} 0 + gpfs_ces_state{service="NETWORK",state="DISABLED"} 0 + gpfs_ces_state{service="NETWORK",state="FAILED"} 0 gpfs_ces_state{service="NETWORK",state="HEALTHY"} 1 + gpfs_ces_state{service="NETWORK",state="STARTING"} 0 + gpfs_ces_state{service="NETWORK",state="STOPPED"} 0 + gpfs_ces_state{service="NETWORK",state="SUSPENDED"} 0 + gpfs_ces_state{service="NETWORK",state="UNKNOWN"} 0 + gpfs_ces_state{service="NFS",state="DEGRADED"} 0 + gpfs_ces_state{service="NFS",state="DEPEND"} 0 + gpfs_ces_state{service="NFS",state="DISABLED"} 0 + gpfs_ces_state{service="NFS",state="FAILED"} 0 gpfs_ces_state{service="NFS",state="HEALTHY"} 1 + gpfs_ces_state{service="NFS",state="STARTING"} 0 + gpfs_ces_state{service="NFS",state="STOPPED"} 0 + gpfs_ces_state{service="NFS",state="SUSPENDED"} 0 + gpfs_ces_state{service="NFS",state="UNKNOWN"} 0 + gpfs_ces_state{service="OBJ",state="DEGRADED"} 0 + gpfs_ces_state{service="OBJ",state="DEPEND"} 0 gpfs_ces_state{service="OBJ",state="DISABLED"} 1 - gpfs_ces_state{service="SMB",state="HEALTHY"} 1 + gpfs_ces_state{service="OBJ",state="FAILED"} 0 + gpfs_ces_state{service="OBJ",state="HEALTHY"} 0 + gpfs_ces_state{service="OBJ",state="STARTING"} 0 + gpfs_ces_state{service="OBJ",state="STOPPED"} 0 + gpfs_ces_state{service="OBJ",state="SUSPENDED"} 0 + gpfs_ces_state{service="OBJ",state="UNKNOWN"} 0 + gpfs_ces_state{service="SMB",state="DEGRADED"} 0 + gpfs_ces_state{service="SMB",state="DEPEND"} 0 + gpfs_ces_state{service="SMB",state="DISABLED"} 0 + gpfs_ces_state{service="SMB",state="FAILED"} 0 + gpfs_ces_state{service="SMB",state="HEALTHY"} 0 + gpfs_ces_state{service="SMB",state="STARTING"} 0 + gpfs_ces_state{service="SMB",state="STOPPED"} 0 + gpfs_ces_state{service="SMB",state="SUSPENDED"} 0 + gpfs_ces_state{service="SMB",state="UNKNOWN"} 1 ` collector := NewMmcesCollector(log.NewNopLogger()) gatherers := setupGatherer(collector) if val, err := testutil.GatherAndCount(gatherers); err != nil { t.Errorf("Unexpected error: %v", err) - } else if val != 11 { - t.Errorf("Unexpected collection count %d, expected 11", val) + } else if val != 75 { + t.Errorf("Unexpected collection count %d, expected 75", val) } if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_ces_state"); err != nil { t.Errorf("unexpected collecting result:\n%s", err) diff --git a/collectors/mmhealth.go b/collectors/mmhealth.go index 006d32c..28ac139 100644 --- a/collectors/mmhealth.go +++ b/collectors/mmhealth.go @@ -36,7 +36,8 @@ var ( "entitytype": "EntityType", "status": "Status", } - mmhealthExec = mmhealth + mmhealthStatuses = []string{"CHECKING", "DEGRADED", "DEPEND", "DISABLED", "FAILED", "HEALTHY", "STARTING", "STOPPED", "SUSPENDED", "TIPS"} + mmhealthExec = mmhealth ) type HealthMetric struct { @@ -81,7 +82,20 @@ func (c *MmhealthCollector) Collect(ch chan<- prometheus.Metric) { errorMetric = 1 } for _, m := range metrics { - ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, 1, m.Component, m.EntityName, m.EntityType, m.Status) + for _, s := range mmhealthStatuses { + var value float64 + if s == m.Status { + value = 1 + } + ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, value, m.Component, m.EntityName, m.EntityType, s) + } + var unknown float64 + if !SliceContains(mmhealthStatuses, m.Status) { + unknown = 1 + level.Warn(c.logger).Log("msg", "Unknown status encountered", "status", m.Status, + "component", m.Component, "entityname", m.EntityName, "entitytype", m.EntityType) + } + ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, unknown, m.Component, m.EntityName, m.EntityType, "UNKNOWN") } ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "mmhealth") ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "mmhealth") diff --git a/collectors/mmhealth_test.go b/collectors/mmhealth_test.go index e84d8d7..0c016fe 100644 --- a/collectors/mmhealth_test.go +++ b/collectors/mmhealth_test.go @@ -35,7 +35,7 @@ mmhealth:State:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:T mmhealth:Event:0:1:::ib-haswell1.example.com:GPFS:ib-haswell1.example.com:NODE:gpfs_pagepool_small::2020-01-07 16%3A47%3A43.892296 EST::no: mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:ib-haswell1.example.com:NODE:HEALTHY:2020-01-07 17%3A02%3A40.131272 EST: mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:ib0:NIC:HEALTHY:2020-01-07 16%3A47%3A39.397852 EST: -mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:mlx5_0/1:IB_RDMA:HEALTHY:2020-01-07 17%3A02%3A40.205075 EST: +mmhealth:State:0:1:::ib-haswell1.example.com:NETWORK:mlx5_0/1:IB_RDMA:FOO:2020-01-07 17%3A02%3A40.205075 EST: mmhealth:State:0:1:::ib-haswell1.example.com:FILESYSTEM:ib-haswell1.example.com:NODE:HEALTHY:2020-01-27 09%3A35%3A21.499264 EST: mmhealth:State:0:1:::ib-haswell1.example.com:FILESYSTEM:project:FILESYSTEM:HEALTHY:2020-01-27 09%3A35%3A21.573978 EST: mmhealth:State:0:1:::ib-haswell1.example.com:FILESYSTEM:scratch:FILESYSTEM:HEALTHY:2020-01-27 09%3A35%3A21.657798 EST: @@ -121,22 +121,112 @@ func TestMmhealthCollector(t *testing.T) { expected := ` # HELP gpfs_health_status GPFS health status # TYPE gpfs_health_status gauge + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="CHECKING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEGRADED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEPEND"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="DISABLED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="FAILED"} 0 gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="HEALTHY"} 1 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="STARTING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="STOPPED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="SUSPENDED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="TIPS"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ib-haswell1.example.com",entitytype="NODE",status="UNKNOWN"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="CHECKING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="DEGRADED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="DEPEND"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="DISABLED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="FAILED"} 0 gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="HEALTHY"} 1 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="STARTING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="STOPPED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="SUSPENDED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="TIPS"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="project",entitytype="FILESYSTEM",status="UNKNOWN"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="CHECKING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="DEGRADED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="DEPEND"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="DISABLED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="FAILED"} 0 gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="HEALTHY"} 1 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="STARTING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="STOPPED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="SUSPENDED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="TIPS"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="scratch",entitytype="FILESYSTEM",status="UNKNOWN"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="CHECKING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="DEGRADED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="DEPEND"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="DISABLED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="FAILED"} 0 gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="HEALTHY"} 1 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="STARTING"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="STOPPED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="SUSPENDED"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="TIPS"} 0 + gpfs_health_status{component="FILESYSTEM",entityname="ess",entitytype="FILESYSTEM",status="UNKNOWN"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="CHECKING"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEGRADED"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEPEND"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="DISABLED"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="FAILED"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="HEALTHY"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="STARTING"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="STOPPED"} 0 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="SUSPENDED"} 0 gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="TIPS"} 1 + gpfs_health_status{component="GPFS",entityname="ib-haswell1.example.com",entitytype="NODE",status="UNKNOWN"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="CHECKING"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEGRADED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEPEND"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="DISABLED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="FAILED"} 0 gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="HEALTHY"} 1 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="STARTING"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="STOPPED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="SUSPENDED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="TIPS"} 0 + gpfs_health_status{component="NETWORK",entityname="ib-haswell1.example.com",entitytype="NODE",status="UNKNOWN"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="CHECKING"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="DEGRADED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="DEPEND"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="DISABLED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="FAILED"} 0 gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="HEALTHY"} 1 - gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="HEALTHY"} 1 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="STARTING"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="STOPPED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="SUSPENDED"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="TIPS"} 0 + gpfs_health_status{component="NETWORK",entityname="ib0",entitytype="NIC",status="UNKNOWN"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="CHECKING"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="DEGRADED"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="DEPEND"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="DISABLED"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="FAILED"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="HEALTHY"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="STARTING"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="STOPPED"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="SUSPENDED"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="TIPS"} 0 + gpfs_health_status{component="NETWORK",entityname="mlx5_0/1",entitytype="IB_RDMA",status="UNKNOWN"} 1 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="CHECKING"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEGRADED"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="DEPEND"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="DISABLED"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="FAILED"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="HEALTHY"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="STARTING"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="STOPPED"} 0 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="SUSPENDED"} 0 gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="TIPS"} 1 + gpfs_health_status{component="NODE",entityname="ib-haswell1.example.com",entitytype="NODE",status="UNKNOWN"} 0 ` collector := NewMmhealthCollector(log.NewNopLogger()) gatherers := setupGatherer(collector) if val, err := testutil.GatherAndCount(gatherers); err != nil { t.Errorf("Unexpected error: %v", err) - } else if val != 12 { - t.Errorf("Unexpected collection count %d, expected 12", val) + } else if val != 102 { + t.Errorf("Unexpected collection count %d, expected 102", val) } if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_health_status"); err != nil { t.Errorf("unexpected collecting result:\n%s", err)