Skip to content

Commit

Permalink
Learn about PCI devices of a host
Browse files Browse the repository at this point in the history
This commit adds the ability to HostNexus to learn about PCI devices.
For now, we only consider NVIDIA devices for which IOMMU is enabled.
  • Loading branch information
bsatzger committed May 7, 2024
1 parent cdc73e1 commit 37200b6
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 0 deletions.
50 changes: 50 additions & 0 deletions prog/learn_pci.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# frozen_string_literal: true

class Prog::LearnPci < Prog::Base
subject_is :sshable, :vm_host

REQUIRED_KEYS = ["Slot", "Class", "Vendor", "Device"]
PciDeviceRecord = Struct.new(:slot, :device_class, :vendor, :device, :numa_node, :iommu_group) do
def self.parse_all(lspci_str)
out = []
lspci_str.strip.split(/^\n+/).each do |dev_str|
dev_h = dev_str.split("\n").map { |e| e.split(":\t") }.to_h
fail "BUG: lspci parse failed" unless REQUIRED_KEYS.all? { |s| dev_h.key? s }
next unless dev_h.key? "IOMMUGroup"
out << PciDeviceRecord.new(dev_h["Slot"], dev_h["Class"], dev_h["Vendor"], dev_h["Device"], dev_h["NUMANode"], dev_h["IOMMUGroup"])
end
out.freeze
end
end

def make_model_instances
PciDeviceRecord.parse_all(sshable.cmd("/usr/bin/lspci -vnmm -d 10de::")).map do |rec|
PciDevice.new_with_id(
vm_host_id: vm_host.id,
slot: rec.slot,
device_class: rec.device_class,
vendor: rec.vendor,
device: rec.device,
numa_node: rec.numa_node,
iommu_group: rec.iommu_group
)
end
end

label def start
make_model_instances.each do |pci|
pci.skip_auto_validations(:unique) do
pci.insert_conflict(target: [:vm_host_id, :slot],
update: {
device_class: Sequel[:excluded][:device_class],
vendor: Sequel[:excluded][:vendor],
device: Sequel[:excluded][:device],
numa_node: Sequel[:excluded][:numa_node],
iommu_group: Sequel[:excluded][:iommu_group]
}).save_changes
end
end

pop("created PciDevice records")
end
end
1 change: 1 addition & 0 deletions prog/vm/host_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def before_run
bud Prog::LearnArch
bud Prog::LearnCores
bud Prog::LearnStorage
bud Prog::LearnPci
bud Prog::InstallDnsmasq
bud Prog::SetupSysstat
bud Prog::SetupNftables
Expand Down
111 changes: 111 additions & 0 deletions spec/prog/learn_pci_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# frozen_string_literal: true

require_relative "../model/spec_helper"

RSpec.describe Prog::LearnPci do
describe "#make_model_instances" do
let(:nvidia_gpu_with_audio) do
end

it "exits, saving model instances" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Vendor: 10de
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
NUMANode: 1
IOMMUGroup: 13
Slot: 01:00.1
Class: 0403
Vendor: 10de
Device: 22bc
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
EOS
expect { lp.start }.to exit({"msg" => "created PciDevice records"}).and change {
PciDevice.map { {vm_host_id: _1.vm_host_id, slot: _1.slot, device_class: _1.device_class, vendor: _1.vendor, device: _1.device, numa_node: _1.numa_node, iommu_group: _1.iommu_group, vm_id: _1.vm_id} }.sort_by { _1[:slot] }
}.from(
[]
).to(
[
{vm_host_id: vmh.id, slot: "01:00.0", device_class: "0300", vendor: "10de", device: "27b0", numa_node: 1, iommu_group: 13, vm_id: nil},
{vm_host_id: vmh.id, slot: "01:00.1", device_class: "0403", vendor: "10de", device: "22bc", numa_node: nil, iommu_group: 13, vm_id: nil}
]
)
end

it "exits, updating existing model instances" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
PciDevice.create_with_id(vm_host_id: vmh.id, slot: "01:00.0", device_class: "dc", vendor: "vd", device: "dv", numa_node: 0, iommu_group: 3)
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Vendor: 10de
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
Slot: 01:00.1
Class: 0403
Vendor: 10de
Device: 22bc
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
EOS
expect { lp.start }.to exit({"msg" => "created PciDevice records"}).and change {
PciDevice.map { {vm_host_id: _1.vm_host_id, slot: _1.slot, device_class: _1.device_class, vendor: _1.vendor, device: _1.device, numa_node: _1.numa_node, iommu_group: _1.iommu_group, vm_id: _1.vm_id} }.sort_by { _1[:slot] }
}.from(
[{vm_host_id: vmh.id, slot: "01:00.0", device_class: "dc", vendor: "vd", device: "dv", numa_node: 0, iommu_group: 3, vm_id: nil}]
).to(
[
{vm_host_id: vmh.id, slot: "01:00.0", device_class: "0300", vendor: "10de", device: "27b0", numa_node: nil, iommu_group: 13, vm_id: nil},
{vm_host_id: vmh.id, slot: "01:00.1", device_class: "0403", vendor: "10de", device: "22bc", numa_node: nil, iommu_group: 13, vm_id: nil}
]
)
end

it "ignores devices without iommu group" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Vendor: 10de
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
EOS
expect(lp.make_model_instances).to eq([])
end

it "can raise a data parse error" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
EOS

expect { lp.make_model_instances }.to raise_error RuntimeError, "BUG: lspci parse failed"
end
end
end
1 change: 1 addition & 0 deletions spec/prog/vm/host_nexus_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
Prog::LearnArch,
Prog::LearnCores,
Prog::LearnStorage,
Prog::LearnPci,
Prog::InstallDnsmasq,
Prog::SetupSysstat,
Prog::SetupNftables
Expand Down

0 comments on commit 37200b6

Please sign in to comment.