diff --git a/doc/lua-doxygen.md b/doc/lua-doxygen.md
index fbddcd57b..7d9e5845c 100644
--- a/doc/lua-doxygen.md
+++ b/doc/lua-doxygen.md
@@ -3723,6 +3723,10 @@ The option 'n' takes an argument, specified by the ':'. If found the option argu
\a pciDom |
PCI domain identifier of the device |
+
+ \a pciFunc |
+ PCI function identifier of the device |
+
\a maxBlockRegs |
Maximum number of 32-bit registers available to a thread block |
diff --git a/src/applications/likwid-topology.lua b/src/applications/likwid-topology.lua
index 0be247897..436a80870 100644
--- a/src/applications/likwid-topology.lua
+++ b/src/applications/likwid-topology.lua
@@ -329,6 +329,7 @@ if likwid.nvSupported() then
table.insert(output_csv, string.format("PCI bus:\t\t0x%x", gpu["pciBus"]))
table.insert(output_csv, string.format("PCI domain:\t\t0x%x", gpu["pciDom"]))
table.insert(output_csv, string.format("PCI device:\t\t0x%x", gpu["pciDev"]))
+ table.insert(output_csv, string.format("PCI function:\t\t0x%x", gpu["pciFunc"]))
end
table.insert(output_csv, likwid.hline)
end
diff --git a/src/includes/likwid.h b/src/includes/likwid.h
index 0ac8f933a..66d0d5780 100644
--- a/src/includes/likwid.h
+++ b/src/includes/likwid.h
@@ -2195,6 +2195,7 @@ typedef struct {
int pciDev; /*!< \brief PCI device (also known as slot) identifier of the
device */
int pciDom; /*!< \brief PCI domain identifier of the device */
+ int pciFunc; /*!< \brief PCI function identifier of the device */
int maxBlockRegs; /*!< \brief Maximum number of 32-bit registers available to
a thread block */
int numMultiProcs; /*!< \brief Number of multiprocessors on the device */
diff --git a/src/likwid_device.c b/src/likwid_device.c
index 291db3918..ccd038792 100644
--- a/src/likwid_device.c
+++ b/src/likwid_device.c
@@ -551,10 +551,11 @@ int likwid_device_get_available(LikwidDeviceType type, char ***id_list)
{
#ifdef LIKWID_WITH_NVMON
case DEVICE_TYPE_NVIDIA_GPU:
- snprintf(id_str, sizeof(id_str), "GN:%08x:%02x:%02x.0",
+ snprintf(id_str, sizeof(id_str), "GN:%08x:%02x:%02x.%01x",
cuda_topo->devices[i].pciDom,
cuda_topo->devices[i].pciBus,
- cuda_topo->devices[i].pciDev);
+ cuda_topo->devices[i].pciDev,
+ cuda_topo->devices[i].pciFunc);
break;
#endif
#ifdef LIKWID_WITH_ROCMON
diff --git a/src/luawid.c b/src/luawid.c
index 993d653d8..4ce044ec3 100644
--- a/src/luawid.c
+++ b/src/luawid.c
@@ -2625,6 +2625,9 @@ static int lua_likwid_getCudaTopology(lua_State *L) {
lua_pushstring(L, "pciDom");
lua_pushinteger(L, (lua_Integer)(gpu->pciDom));
lua_settable(L, -3);
+ lua_pushstring(L, "pciFunc");
+ lua_pushinteger(L, (lua_Integer)(gpu->pciFunc));
+ lua_settable(L, -3);
lua_pushstring(L, "maxBlockRegs");
lua_pushinteger(L, (lua_Integer)(gpu->maxBlockRegs));
lua_settable(L, -3);
diff --git a/src/topology_cuda.c b/src/topology_cuda.c
index 32091c255..2d4f5ea99 100644
--- a/src/topology_cuda.c
+++ b/src/topology_cuda.c
@@ -304,6 +304,8 @@ topology_cuda_init()
CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].pciBus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, dev), ret = -ENOMEM; goto topology_gpu_init_error;);
CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].pciDev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, dev), ret = -ENOMEM; goto topology_gpu_init_error;);
CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].pciDom, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, dev), ret = -ENOMEM; goto topology_gpu_init_error;);
+ // TODO: Get PCI function through nvmlDeviceGetPciInfo_v3, nvmlPciInfo_t->function
+ cudaTopology.devices[i].pciFunc = 0;
CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].maxBlockRegs, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev), ret = -ENOMEM; goto topology_gpu_init_error;);
CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].numMultiProcs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev), ret = -ENOMEM; goto topology_gpu_init_error;);
CU_CALL((*cuDeviceGetAttributeTopoPtr)(&cudaTopology.devices[i].maxThreadPerMultiProc, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev), ret = -ENOMEM; goto topology_gpu_init_error;);
diff --git a/test/test-topology-gpu-rocm.c b/test/test-topology-gpu-rocm.c
index 4b58687bf..a2784dbbb 100644
--- a/test/test-topology-gpu-rocm.c
+++ b/test/test-topology-gpu-rocm.c
@@ -45,6 +45,7 @@ int main()
printf("pciBus: %d\n", device->pciBus);
printf("pciDev: %d\n", device->pciDev);
printf("pciDom: %d\n", device->pciDom);
+ printf("pciFunc: %d\n", device->pciFunc);
printf("numMultiProcs: %d\n", device->numMultiProcs);
printf("maxThreadPerMultiProc: %d\n", device->maxThreadPerMultiProc);
printf("memBusWidth: %d\n", device->memBusWidth);