Skip to content

Commit

Permalink
Fix ibswinfo parsing when a PSU loses power on a switch (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
treydock authored May 21, 2023
1 parent 6266a6e commit bc0f728
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 12 deletions.
40 changes: 40 additions & 0 deletions collectors/fixtures/ibswinfo/test3.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
=================================================
Quantum Mellanox Technologies
=================================================
part number | MQM8790-HS2F
serial number | MT2148T25782
product name | Jaguar Unmng IB 200
revision | AH
ports | 80
PSID | MT_0000000063
GUID | 0x08c0eb0300add20e
firmware version | 27.2010.4102
-------------------------------------------------
uptime (d-h:m:s) | 67d-04:19:16
-------------------------------------------------
PSU0 status | OK
P/N | MTEF-PSF-AC-C
S/N | MT2148T09879
DC power | OK
fan status | OK
power (W) | 287
PSU1 status | OK
P/N | MTEF-PSF-AC-C
S/N | MT2148T09888
DC power | ERROR
fan status | ERROR
-------------------------------------------------
temperature (C) | 47
max temp (C) | 52
-------------------------------------------------
fan status | OK
fan#1 (rpm) | 5959
fan#2 (rpm) | 5293
fan#3 (rpm) | 5854
fan#4 (rpm) | 5251
fan#5 (rpm) | 5906
fan#6 (rpm) | 5335
fan#7 (rpm) | 6013
fan#8 (rpm) | 5379
fan#9 (rpm) | 5854
-------------------------------------------------
25 changes: 13 additions & 12 deletions collectors/ibswinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,13 @@ func parse_ibswinfo(out string, logger log.Logger) (Ibswinfo, error) {
var powerSupplies []SwitchPowerSupply
var fans []SwitchFan
var psuID string
var dividerCount int
rePSU := regexp.MustCompile(`PSU([0-9]) status`)
reFan := regexp.MustCompile(`fan#([0-9]+)`)
for _, line := range lines {
if strings.HasPrefix(line, "-----") {
dividerCount++
}
l := strings.Split(line, "|")
if len(l) != 2 {
continue
Expand All @@ -238,21 +242,21 @@ func parse_ibswinfo(out string, logger log.Logger) (Ibswinfo, error) {
case "firmware version":
data.FirmwareVersion = value
}
matchesPSU := rePSU.FindStringSubmatch(key)
var psu SwitchPowerSupply
if psuID != "" {
if p, ok := psus[psuID]; ok {
psu = p
}
}
matchesPSU := rePSU.FindStringSubmatch(key)
if len(matchesPSU) == 2 {
psuID = matchesPSU[1]
psu.Status = value
}
if psu.Status == "" && psuID != "" && dividerCount < 4 {
if p, ok := psus[psuID]; ok {
psu = p
}
}
if key == "DC power" {
psu.DCPower = value
}
if psuID != "" && key == "fan status" {
if key == "fan status" && dividerCount < 4 {
psu.FanStatus = value
}
if key == "power (W)" {
Expand All @@ -264,12 +268,9 @@ func parse_ibswinfo(out string, logger log.Logger) (Ibswinfo, error) {
return Ibswinfo{}, err
}
}
if psuID != "" {
if psuID != "" && dividerCount < 4 {
psus[psuID] = psu
}
if key == "power (W)" {
psuID = ""
}
if key == "temperature (C)" {
temp, err := strconv.ParseFloat(value, 64)
if err == nil {
Expand All @@ -279,7 +280,7 @@ func parse_ibswinfo(out string, logger log.Logger) (Ibswinfo, error) {
return Ibswinfo{}, err
}
}
if psuID == "" && key == "fan status" {
if key == "fan status" && dividerCount >= 4 {
data.FanStatus = value
}
matchesFan := reFan.FindStringSubmatch(key)
Expand Down
82 changes: 82 additions & 0 deletions collectors/ibswinfo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,88 @@ func TestParseIBSWInfo(t *testing.T) {
}
}

func TestParseIBSWInfoFailedPSU(t *testing.T) {
out, err := ReadFixture("ibswinfo", "test3")
if err != nil {
t.Fatal("Unable to read fixture")
}
data, err := parse_ibswinfo(out, log.NewNopLogger())
if err != nil {
t.Errorf("Unexpected error: %s", err)
}
if data.PartNumber != "MQM8790-HS2F" {
t.Errorf("Unexpected part number, got %s", data.PartNumber)
}
if data.SerialNumber != "MT2148T25782" {
t.Errorf("Unexpected serial number, got %s", data.SerialNumber)
}
if data.PSID != "MT_0000000063" {
t.Errorf("Unexpected PSID, got %s", data.PSID)
}
if data.FirmwareVersion != "27.2010.4102" {
t.Errorf("Unexpected firmware version, got %s", data.FirmwareVersion)
}
if len(data.PowerSupplies) != 2 {
t.Errorf("Unexpected number of power supplies, got %d", len(data.PowerSupplies))
}
var psu0, psu1 SwitchPowerSupply
for _, psu := range data.PowerSupplies {
if psu.ID == "0" {
psu0 = psu
break
}
}
if psu0.Status != "OK" {
t.Errorf("Unexpected power supply status, got %s", psu0.Status)
}
if psu0.DCPower != "OK" {
t.Errorf("Unexpected power supply dc power status, got %s", psu0.DCPower)
}
if psu0.FanStatus != "OK" {
t.Errorf("Unexpected power supply fan status, got %s", psu0.FanStatus)
}
if psu0.PowerW != 287 {
t.Errorf("Unexpected power supply watts, got %f", psu0.PowerW)
}
for _, psu := range data.PowerSupplies {
if psu.ID == "1" {
psu1 = psu
break
}
}
if psu1.Status != "OK" {
t.Errorf("Unexpected power supply status, got %s", psu1.Status)
}
if psu1.DCPower != "ERROR" {
t.Errorf("Unexpected power supply dc power status, got %s", psu1.DCPower)
}
if psu1.FanStatus != "ERROR" {
t.Errorf("Unexpected power supply fan status, got %s", psu1.FanStatus)
}
if psu1.PowerW != 0 {
t.Errorf("Unexpected power supply watts, got %f", psu1.PowerW)
}
if data.Temp != 47 {
t.Errorf("Unexpected temp, got %f", data.Temp)
}
if data.FanStatus != "OK" {
t.Errorf("Unexpected fan status, got %s", data.FanStatus)
}
if len(data.Fans) != 9 {
t.Errorf("Unexpected number of fans, got %d", len(data.Fans))
}
var fan1 SwitchFan
for _, fan := range data.Fans {
if fan.ID == "1" {
fan1 = fan
break
}
}
if fan1.RPM != 5959 {
t.Errorf("Unexpected fan RPM, got %f", fan1.RPM)
}
}

func TestParseIBSWInfoErrors(t *testing.T) {
tests := []string{
"test-err1",
Expand Down

0 comments on commit bc0f728

Please sign in to comment.