diff --git a/doc/lua-doxygen.md b/doc/lua-doxygen.md index fbddcd57b..7d9e5845c 100644 --- a/doc/lua-doxygen.md +++ b/doc/lua-doxygen.md @@ -3723,6 +3723,10 @@ The option 'n' takes an argument, specified by the ':'. If found the option argu \a pciDom PCI domain identifier of the device + + \a pciFunc + PCI function identifier of the device + \a maxBlockRegs Maximum number of 32-bit registers available to a thread block diff --git a/src/applications/likwid-topology.lua b/src/applications/likwid-topology.lua index 0be247897..436a80870 100644 --- a/src/applications/likwid-topology.lua +++ b/src/applications/likwid-topology.lua @@ -329,6 +329,7 @@ if likwid.nvSupported() then table.insert(output_csv, string.format("PCI bus:\t\t0x%x", gpu["pciBus"])) table.insert(output_csv, string.format("PCI domain:\t\t0x%x", gpu["pciDom"])) table.insert(output_csv, string.format("PCI device:\t\t0x%x", gpu["pciDev"])) + table.insert(output_csv, string.format("PCI function:\t\t0x%x", gpu["pciFunc"])) end table.insert(output_csv, likwid.hline) end diff --git a/src/includes/likwid.h b/src/includes/likwid.h index 0ac8f933a..66d0d5780 100644 --- a/src/includes/likwid.h +++ b/src/includes/likwid.h @@ -2195,6 +2195,7 @@ typedef struct { int pciDev; /*!< \brief PCI device (also known as slot) identifier of the device */ int pciDom; /*!< \brief PCI domain identifier of the device */ + int pciFunc; /*!< \brief PCI function identifier of the device */ int maxBlockRegs; /*!< \brief Maximum number of 32-bit registers available to a thread block */ int numMultiProcs; /*!< \brief Number of multiprocessors on the device */ diff --git a/src/likwid_device.c b/src/likwid_device.c index 291db3918..ccd038792 100644 --- a/src/likwid_device.c +++ b/src/likwid_device.c @@ -551,10 +551,11 @@ int likwid_device_get_available(LikwidDeviceType type, char ***id_list) { #ifdef LIKWID_WITH_NVMON case DEVICE_TYPE_NVIDIA_GPU: - snprintf(id_str, sizeof(id_str), "GN:%08x:%02x:%02x.0", + snprintf(id_str, sizeof(id_str), "GN:%08x:%02x:%02x.%01x", cuda_topo->devices[i].pciDom, cuda_topo->devices[i].pciBus, - cuda_topo->devices[i].pciDev); + cuda_topo->devices[i].pciDev, + cuda_topo->devices[i].pciFunc); break; #endif #ifdef LIKWID_WITH_ROCMON diff --git a/src/luawid.c b/src/luawid.c index 993d653d8..4ce044ec3 100644 --- a/src/luawid.c +++ b/src/luawid.c @@ -2625,6 +2625,9 @@ static int lua_likwid_getCudaTopology(lua_State *L) { lua_pushstring(L, "pciDom"); lua_pushinteger(L, (lua_Integer)(gpu->pciDom)); lua_settable(L, -3); + lua_pushstring(L, "pciFunc"); + lua_pushinteger(L, (lua_Integer)(gpu->pciFunc)); + lua_settable(L, -3); lua_pushstring(L, "maxBlockRegs"); lua_pushinteger(L, (lua_Integer)(gpu->maxBlockRegs)); lua_settable(L, -3); diff --git a/src/topology_cuda.c b/src/topology_cuda.c index 32091c255..2d4f5ea99 100644 --- a/src/topology_cuda.c +++ b/src/topology_cuda.c @@ -304,6 +304,8 @@ topology_cuda_init() CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].pciBus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, dev), ret = -ENOMEM; goto topology_gpu_init_error;); CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].pciDev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, dev), ret = -ENOMEM; goto topology_gpu_init_error;); CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].pciDom, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, dev), ret = -ENOMEM; goto topology_gpu_init_error;); + // TODO: Get PCI function through nvmlDeviceGetPciInfo_v3, nvmlPciInfo_t->function + cudaTopology.devices[i].pciFunc = 0; CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].maxBlockRegs, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev), ret = -ENOMEM; goto topology_gpu_init_error;); CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].numMultiProcs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev), ret = -ENOMEM; goto topology_gpu_init_error;); CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].maxThreadPerMultiProc, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev), ret = -ENOMEM; goto topology_gpu_init_error;); diff --git a/test/test-topology-gpu-rocm.c b/test/test-topology-gpu-rocm.c index 4b58687bf..a2784dbbb 100644 --- a/test/test-topology-gpu-rocm.c +++ b/test/test-topology-gpu-rocm.c @@ -45,6 +45,7 @@ int main() printf("pciBus: %d\n", device->pciBus); printf("pciDev: %d\n", device->pciDev); printf("pciDom: %d\n", device->pciDom); + printf("pciFunc: %d\n", device->pciFunc); printf("numMultiProcs: %d\n", device->numMultiProcs); printf("maxThreadPerMultiProc: %d\n", device->maxThreadPerMultiProc); printf("memBusWidth: %d\n", device->memBusWidth);