Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce PCI devices #1509

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions migrate/20240507_pci_device.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

Sequel.migration do
change do
create_table(:pci_device) do
column :id, :uuid, primary_key: true
column :slot, :text, null: false
column :device_class, :text, null: false
column :vendor, :text, null: false
column :device, :text, null: false
column :numa_node, :Integer, null: true
column :iommu_group, :Integer, null: false
column :enabled, :bool, null: false, default: true
foreign_key :vm_host_id, :vm_host, type: :uuid, null: false
foreign_key :vm_id, :vm, type: :uuid, null: true
bsatzger marked this conversation as resolved.
Show resolved Hide resolved
unique [:vm_host_id, :slot]
index [:vm_id]
end
end
end
18 changes: 18 additions & 0 deletions model/pci_device.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

require_relative "../model"

class PciDevice < Sequel::Model
bsatzger marked this conversation as resolved.
Show resolved Hide resolved
include ResourceMethods

many_to_one :vm_host
many_to_one :vm

def self.ubid_type
UBID::TYPE_ETC
end

def is_gpu
["0300", "0302"].include? device_class
end
end
1 change: 1 addition & 0 deletions model/vm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Vm < Sequel::Model
one_to_one :assigned_vm_address, key: :dst_vm_id, class: :AssignedVmAddress
one_to_many :vm_storage_volumes, key: :vm_id, order: Sequel.desc(:boot)
one_to_one :active_billing_record, class: :BillingRecord, key: :resource_id do |ds| ds.active end
one_to_many :pci_devices, key: :vm_id, class: :PciDevice

plugin :association_dependencies, sshable: :destroy, assigned_vm_address: :destroy, vm_storage_volumes: :destroy

Expand Down
1 change: 1 addition & 0 deletions model/vm_host.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class VmHost < Sequel::Model
one_to_many :assigned_host_addresses, key: :host_id, class: :AssignedHostAddress
one_to_many :spdk_installations, key: :vm_host_id
one_to_many :storage_devices, key: :vm_host_id
one_to_many :pci_devices, key: :vm_host_id

plugin :association_dependencies, assigned_host_addresses: :destroy, assigned_subnets: :destroy, hetzner_host: :destroy, spdk_installations: :destroy, storage_devices: :destroy

Expand Down
50 changes: 50 additions & 0 deletions prog/learn_pci.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# frozen_string_literal: true

class Prog::LearnPci < Prog::Base
subject_is :sshable, :vm_host

REQUIRED_KEYS = ["Slot", "Class", "Vendor", "Device"]
PciDeviceRecord = Struct.new(:slot, :device_class, :vendor, :device, :numa_node, :iommu_group) do
def self.parse_all(lspci_str)
out = []
lspci_str.strip.split(/^\n+/).each do |dev_str|
dev_h = dev_str.split("\n").map { |e| e.split(":\t") }.to_h
fail "BUG: lspci parse failed" unless REQUIRED_KEYS.all? { |s| dev_h.key? s }
next unless dev_h.key? "IOMMUGroup"
out << PciDeviceRecord.new(dev_h["Slot"], dev_h["Class"], dev_h["Vendor"], dev_h["Device"], dev_h["NUMANode"], dev_h["IOMMUGroup"])
end
out.freeze
end
end

def make_model_instances
PciDeviceRecord.parse_all(sshable.cmd("/usr/bin/lspci -vnmm -d 10de::")).map do |rec|
bsatzger marked this conversation as resolved.
Show resolved Hide resolved
PciDevice.new_with_id(
vm_host_id: vm_host.id,
slot: rec.slot,
device_class: rec.device_class,
vendor: rec.vendor,
device: rec.device,
numa_node: rec.numa_node,
iommu_group: rec.iommu_group
)
end
end

label def start
make_model_instances.each do |pci|
pci.skip_auto_validations(:unique) do
pci.insert_conflict(target: [:vm_host_id, :slot],
update: {
device_class: Sequel[:excluded][:device_class],
vendor: Sequel[:excluded][:vendor],
device: Sequel[:excluded][:device],
numa_node: Sequel[:excluded][:numa_node],
iommu_group: Sequel[:excluded][:iommu_group]
}).save_changes
end
end

pop("created PciDevice records")
end
end
1 change: 1 addition & 0 deletions prog/vm/host_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def before_run
bud Prog::LearnArch
bud Prog::LearnCores
bud Prog::LearnStorage
bud Prog::LearnPci
bud Prog::InstallDnsmasq
bud Prog::SetupSysstat
bud Prog::SetupNftables
Expand Down
20 changes: 20 additions & 0 deletions spec/model/pci_device_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

require_relative "spec_helper"

RSpec.describe PciDevice do
it "returns correctly that a device with class 300 is a gpu" do
d = described_class.new(device_class: "0300")
expect(d.is_gpu).to be_truthy
end

it "returns correctly that a device with class 302 is a gpu" do
d = described_class.new(device_class: "0302")
expect(d.is_gpu).to be_truthy
end

it "returns correctly that a device is not a gpu" do
d = described_class.new(device_class: "0403")
expect(d.is_gpu).to be_falsy
end
end
111 changes: 111 additions & 0 deletions spec/prog/learn_pci_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# frozen_string_literal: true

require_relative "../model/spec_helper"

RSpec.describe Prog::LearnPci do
describe "#make_model_instances" do
let(:nvidia_gpu_with_audio) do
end

it "exits, saving model instances" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Vendor: 10de
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
NUMANode: 1
IOMMUGroup: 13

Slot: 01:00.1
Class: 0403
Vendor: 10de
Device: 22bc
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
EOS
expect { lp.start }.to exit({"msg" => "created PciDevice records"}).and change {
PciDevice.map { {vm_host_id: _1.vm_host_id, slot: _1.slot, device_class: _1.device_class, vendor: _1.vendor, device: _1.device, numa_node: _1.numa_node, iommu_group: _1.iommu_group, vm_id: _1.vm_id} }.sort_by { _1[:slot] }
}.from(
[]
).to(
[
{vm_host_id: vmh.id, slot: "01:00.0", device_class: "0300", vendor: "10de", device: "27b0", numa_node: 1, iommu_group: 13, vm_id: nil},
{vm_host_id: vmh.id, slot: "01:00.1", device_class: "0403", vendor: "10de", device: "22bc", numa_node: nil, iommu_group: 13, vm_id: nil}
]
)
end

it "exits, updating existing model instances" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
PciDevice.create_with_id(vm_host_id: vmh.id, slot: "01:00.0", device_class: "dc", vendor: "vd", device: "dv", numa_node: 0, iommu_group: 3)
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Vendor: 10de
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13

Slot: 01:00.1
Class: 0403
Vendor: 10de
Device: 22bc
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
EOS
expect { lp.start }.to exit({"msg" => "created PciDevice records"}).and change {
PciDevice.map { {vm_host_id: _1.vm_host_id, slot: _1.slot, device_class: _1.device_class, vendor: _1.vendor, device: _1.device, numa_node: _1.numa_node, iommu_group: _1.iommu_group, vm_id: _1.vm_id} }.sort_by { _1[:slot] }
}.from(
[{vm_host_id: vmh.id, slot: "01:00.0", device_class: "dc", vendor: "vd", device: "dv", numa_node: 0, iommu_group: 3, vm_id: nil}]
).to(
[
{vm_host_id: vmh.id, slot: "01:00.0", device_class: "0300", vendor: "10de", device: "27b0", numa_node: nil, iommu_group: 13, vm_id: nil},
{vm_host_id: vmh.id, slot: "01:00.1", device_class: "0403", vendor: "10de", device: "22bc", numa_node: nil, iommu_group: 13, vm_id: nil}
]
)
end

it "ignores devices without iommu group" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Vendor: 10de
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
EOS
expect(lp.make_model_instances).to eq([])
end

it "can raise a data parse error" do
vmh = Prog::Vm::HostNexus.assemble("::1").subject
lp = described_class.new(Strand.new(stack: [{"subject_id" => vmh.id}]))
expect(lp.sshable).to receive(:cmd).with("/usr/bin/lspci -vnmm -d 10de::").and_return(<<EOS)
Slot: 01:00.0
Class: 0300
Device: 27b0
SVendor: 10de
SDevice: 16fa
Rev: a1
IOMMUGroup: 13
EOS

expect { lp.make_model_instances }.to raise_error RuntimeError, "BUG: lspci parse failed"
end
end
end
1 change: 1 addition & 0 deletions spec/prog/vm/host_nexus_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
Prog::LearnArch,
Prog::LearnCores,
Prog::LearnStorage,
Prog::LearnPci,
Prog::InstallDnsmasq,
Prog::SetupSysstat,
Prog::SetupNftables
Expand Down
Loading