Skip to content

Commit

Permalink
Add infiniband_switch_uptime_seconds from ibswinfo (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
treydock authored Dec 21, 2023
1 parent 49b34c0 commit 8c9d06f
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
31 changes: 31 additions & 0 deletions collectors/ibswinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type IbswinfoCollector struct {
logger log.Logger
collector string
HardwareInfo *prometheus.Desc
Uptime *prometheus.Desc
PowerSupplyStatus *prometheus.Desc
PowerSupplyDCPower *prometheus.Desc
PowerSupplyFanStatus *prometheus.Desc
Expand All @@ -58,6 +59,7 @@ type Ibswinfo struct {
SerialNumber string
PSID string
FirmwareVersion string
Uptime float64
PowerSupplies []SwitchPowerSupply
Temp float64
FanStatus string
Expand Down Expand Up @@ -88,6 +90,8 @@ func NewIbswinfoCollector(devices *[]InfinibandDevice, runonce bool, logger log.
collector: collector,
HardwareInfo: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "hardware_info"),
"Infiniband switch hardware info", []string{"guid", "firmware_version", "psid", "part_number", "serial_number", "switch"}, nil),
Uptime: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "uptime_seconds"),
"Infiniband switch uptime in seconds", []string{"guid"}, nil),
PowerSupplyStatus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "power_supply_status_info"),
"Infiniband switch power supply status", []string{"guid", "psu", "status"}, nil),
PowerSupplyDCPower: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "power_supply_dc_power_status_info"),
Expand All @@ -107,6 +111,7 @@ func NewIbswinfoCollector(devices *[]InfinibandDevice, runonce bool, logger log.

func (s *IbswinfoCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- s.HardwareInfo
ch <- s.Uptime
ch <- s.PowerSupplyStatus
ch <- s.PowerSupplyDCPower
ch <- s.PowerSupplyFanStatus
Expand All @@ -122,6 +127,7 @@ func (s *IbswinfoCollector) Collect(ch chan<- prometheus.Metric) {
for _, swinfo := range swinfos {
ch <- prometheus.MustNewConstMetric(s.HardwareInfo, prometheus.GaugeValue, 1, swinfo.device.GUID,
swinfo.FirmwareVersion, swinfo.PSID, swinfo.PartNumber, swinfo.SerialNumber, swinfo.device.Name)
ch <- prometheus.MustNewConstMetric(s.Uptime, prometheus.GaugeValue, swinfo.Uptime, swinfo.device.GUID)
for _, psu := range swinfo.PowerSupplies {
if psu.Status != "" {
ch <- prometheus.MustNewConstMetric(s.PowerSupplyStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, psu.ID, psu.Status)
Expand Down Expand Up @@ -234,6 +240,7 @@ func parse_ibswinfo(out string, logger log.Logger) (Ibswinfo, error) {
data.Temp = math.NaN()
lines := strings.Split(out, "\n")
psus := make(map[string]SwitchPowerSupply)
var err error
var powerSupplies []SwitchPowerSupply
var fans []SwitchFan
var psuID string
Expand All @@ -260,6 +267,30 @@ func parse_ibswinfo(out string, logger log.Logger) (Ibswinfo, error) {
case "firmware version":
data.FirmwareVersion = value
}
if strings.HasPrefix(key, "uptime") {
// Convert Nd-H:M:S to time that ParseDuration understands
var days float64
uptimeHMS := ""
uptime_s1 := strings.Split(value, "-")
if len(uptime_s1) == 2 {
daysStr := strings.Replace(uptime_s1[0], "d", "", 1)
days, err = strconv.ParseFloat(daysStr, 64)
if err != nil {
level.Error(logger).Log("msg", "Unable to parse uptime duration", "err", err, "value", value)
continue
}
uptimeHMS = uptime_s1[1]
} else {
uptimeHMS = value
}
t1, err := time.Parse("15:04:05", uptimeHMS)
if err != nil {
level.Error(logger).Log("msg", "Unable to parse uptime duration", "err", err, "value", value)
continue
}
t2, _ := time.Parse("15:04:05", "00:00:00")
data.Uptime = (days * 86400) + t1.Sub(t2).Seconds()
}
var psu SwitchPowerSupply
psu.PowerW = math.NaN()
matchesPSU := rePSU.FindStringSubmatch(key)
Expand Down
17 changes: 12 additions & 5 deletions collectors/ibswinfo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ func TestParseIBSWInfo(t *testing.T) {
if data.FirmwareVersion != "11.2008.2102" {
t.Errorf("Unexpected firmware version, got %s", data.FirmwareVersion)
}
if data.Uptime != 13862333 {
t.Errorf("Unexpected uptime, got %f", data.Uptime)
}
if len(data.PowerSupplies) != 2 {
t.Errorf("Unexpected number of power supplies, got %d", len(data.PowerSupplies))
}
Expand Down Expand Up @@ -270,19 +273,23 @@ func TestIbswinfoCollector(t *testing.T) {
# TYPE infiniband_switch_temperature_celsius gauge
infiniband_switch_temperature_celsius{guid="0x506b4b03005c2740"} 53
infiniband_switch_temperature_celsius{guid="0x7cfe9003009ce5b0"} 45
# HELP infiniband_switch_uptime_seconds Infiniband switch uptime in seconds
# TYPE infiniband_switch_uptime_seconds gauge
infiniband_switch_uptime_seconds{guid="0x506b4b03005c2740"} 8301347
infiniband_switch_uptime_seconds{guid="0x7cfe9003009ce5b0"} 13862333
`
collector := NewIbswinfoCollector(&switchDevices, false, log.NewNopLogger())
gatherers := setupGatherer(collector)
if val, err := testutil.GatherAndCount(gatherers); err != nil {
t.Errorf("Unexpected error: %v", err)
} else if val != 42 {
t.Errorf("Unexpected collection count %d, expected 42", val)
} else if val != 44 {
t.Errorf("Unexpected collection count %d, expected 44", val)
}
if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected),
"infiniband_switch_power_supply_status_info", "infiniband_switch_power_supply_dc_power_status_info",
"infiniband_switch_power_supply_fan_status_info", "infiniband_switch_power_supply_watts",
"infiniband_switch_temperature_celsius", "infiniband_switch_fan_status_info", "infiniband_switch_fan_rpm",
"infiniband_switch_hardware_info",
"infiniband_switch_hardware_info", "infiniband_switch_uptime_seconds",
"infiniband_exporter_collect_errors", "infiniband_exporter_collect_timeouts"); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}
Expand All @@ -307,8 +314,8 @@ func TestIbswinfoCollectorMissingStatus(t *testing.T) {
gatherers := setupGatherer(collector)
if val, err := testutil.GatherAndCount(gatherers); err != nil {
t.Errorf("Unexpected error: %v", err)
} else if val != 35 {
t.Errorf("Unexpected collection count %d, expected 42", val)
} else if val != 37 {
t.Errorf("Unexpected collection count %d, expected 37", val)
}
}

Expand Down

0 comments on commit 8c9d06f

Please sign in to comment.