Skip to content

Commit

Permalink
Add likwid_device_get_all function
Browse files Browse the repository at this point in the history
The shared implementation of this new function and
likwid_device_get_available currently does not work properly and needs a
fix in a later commit.
  • Loading branch information
ipatix committed Nov 15, 2024
1 parent 8bff513 commit 9777199
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 10 deletions.
21 changes: 20 additions & 1 deletion src/includes/likwid.h
Original file line number Diff line number Diff line change
Expand Up @@ -3379,14 +3379,33 @@ const char *likwid_device_type_name(LikwidDeviceType type) __attribute__ ((visib
@param [in] device Likwid device
*/
void likwid_device_fmt_pci(char *buf, size_t size, LikwidDevice_t device) __attribute__ ((visibility ("default") ));
/*! \brief Get available device strings. If an error occurs, the out parameters are kept unchanged.
/*! \brief Get available device strings.
Get a list of available devices. The strings in the list returned do not contain
the device type prefix (i.e. "N:", "C:", etc.).
A device is considered available if it belongs to the current CPUset.
In order to get a list of all devices, use 'likwid_device_get_all' instead.
@params [in] type Likwid device type
@params [out] string list of all available devices for the type specified
@params [out] number of list entries.
@return error code (<0 on failure)
*/
int likwid_device_get_available(LikwidDeviceType type, char ***id_list, size_t *id_list_count) __attribute__((visibility ("default") ));
/*! \brief Get all device strings.
Get a list of all devices. The strings in the list returned do not contain
the device type prefix (i.e. "N:", "C:", etc.).
This functions returns all devices regardless of the current CPUset.
In order to get a list of only available devices, use 'likwid_device_get_all'
instead.
@params [in] type Likwid device type
@params [out] string list of all available devices for the type specified
@params [out] number of list entries.
@return error code (<0 on failure)
*/
int likwid_device_get_all(LikwidDeviceType type, char ***id_list, size_t *id_list_count) __attribute__((visibility ("default") ));

/** @}*/

Expand Down
110 changes: 101 additions & 9 deletions src/likwid_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <unistd.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>

#include <likwid.h>
#include <topology.h>
Expand Down Expand Up @@ -476,8 +477,65 @@ void likwid_device_fmt_pci(char *buf, size_t size, LikwidDevice_t device)
snprintf(buf, size, "%08x:%02x:%02x.%01x", dom, bus, dev, func);
}

int likwid_device_get_available(LikwidDeviceType type, char ***id_list, size_t *id_list_count)
static bool device_in_cpuset(LikwidDeviceType type, size_t id)
{
#ifdef LIKWID_WITH_NVMON
assert(type != DEVICE_TYPE_NVIDIA_GPU);
#endif
#ifdef LIKWID_WITH_ROCMON
assert(type != DEVICE_TYPE_AMD_GPU);
#endif

CpuTopology_t cpu_topo = get_cpuTopology();
NumaTopology_t numa_topo = get_numaTopology();

for (size_t i = 0; i < cpu_topo->numHWThreads; i++)
{
const HWThread* t = &cpu_topo->threadPool[i];
switch (type)
{
case DEVICE_TYPE_HWTHREAD:
if (t->apicId == id)
return t->inCpuSet;
break;
case DEVICE_TYPE_CORE:
if (t->coreId == id && t->inCpuSet)
return true;
break;
case DEVICE_TYPE_NUMA:
assert(id < numa_topo->numberOfNodes);
const NumaNode *n = &numa_topo->nodes[id];
for (size_t j = 0; j < n->numberOfProcessors; j++)
{
if (n->processors[j] == t->apicId && t->inCpuSet)
return true;
}
break;
case DEVICE_TYPE_DIE:
if (t->dieId == id && t->inCpuSet)
return true;
break;
case DEVICE_TYPE_SOCKET:
if (t->packageId == id && t->inCpuSet)
return true;
break;
case DEVICE_TYPE_NODE:
return true;
default:
DEBUG_PRINT(DEBUGLEV_DEVELOP, Unimplemented device type: %d, type);
return false;
}
}

return false;
}

static int likwid_device_get_list(LikwidDeviceType type, char ***id_list, size_t *id_list_count, bool cpuset_only)
{
/* There seems to be a bug in this function, fix in a later commit. */
ERROR_PRINT(not implemented);
return -EPERM;

if (type <= DEVICE_TYPE_INVALID || type >= MAX_DEVICE_TYPE || !id_list)
return -EINVAL;

Expand All @@ -494,33 +552,26 @@ int likwid_device_get_available(LikwidDeviceType type, char ***id_list, size_t *
RocmTopology_t rocm_topo = get_rocmTopology();
#endif

const char *id_prefix = NULL;
size_t id_count = 0;

switch (type)
{
case DEVICE_TYPE_HWTHREAD:
id_prefix = "T";
id_count = cpu_topo->numHWThreads;
break;
case DEVICE_TYPE_CORE:
id_prefix = "C";
id_count = cpu_topo->numCoresPerSocket * cpu_topo->numSockets;
break;
case DEVICE_TYPE_NUMA:
id_prefix = "M";
id_count = numa_topo->numberOfNodes;
break;
case DEVICE_TYPE_DIE:
id_prefix = "D";
id_count = cpu_topo->numDies;
break;
case DEVICE_TYPE_SOCKET:
id_prefix = "S";
id_count = cpu_topo->numSockets;
break;
case DEVICE_TYPE_NODE:
id_prefix = "N";
id_count = 1;
break;
#ifdef LIKWID_WITH_NVMON
Expand Down Expand Up @@ -567,7 +618,10 @@ int likwid_device_get_available(LikwidDeviceType type, char ***id_list, size_t *
break;
#endif
default:
snprintf(id_str, sizeof(id_str), "%s:%zu", id_prefix, i);
if (cpuset_only && !device_in_cpuset(type, i))
continue;
snprintf(id_str, sizeof(id_str), "%zu", i);
break;
}

name_list[i] = strdup(id_str);
Expand All @@ -586,7 +640,45 @@ int likwid_device_get_available(LikwidDeviceType type, char ***id_list, size_t *
return err;
}

if (cpuset_only)
{
/* Shrink list again and remove all entries which were left blank. */
size_t write_index = 0;
for (size_t read_index = 0; read_index < id_count; read_index++)
{
if (!name_list[read_index])
continue;

name_list[write_index++] = name_list[read_index];
}

for (size_t i = write_index; i < id_count; i++)
name_list[i] = NULL;

char **shrink_name_list = realloc(name_list, write_index * sizeof(name_list[0]));
if (!shrink_name_list)
{
for (size_t i = 0; i < id_count; i++)
free(name_list[i]);
free(name_list);
return -errno;
}

name_list = shrink_name_list;
id_count = write_index;
}

*id_list = name_list;
*id_list_count = id_count;
return 0;
}

int likwid_device_get_available(LikwidDeviceType type, char ***id_list, size_t *id_list_count)
{
return likwid_device_get_list(type, id_list, id_list_count, true);
}

int likwid_device_get_all(LikwidDeviceType type, char ***id_list, size_t *id_list_count)
{
return likwid_device_get_list(type, id_list, id_list_count, false);
}

0 comments on commit 9777199

Please sign in to comment.