Skip to content

Commit

Permalink
hwdec_cuda: Avoid gpu wakeup by deferring cuInit
Browse files Browse the repository at this point in the history
`cuInit` wakes up the nvidia dgpu on nvidia laptops. This is bad news because the wake up process
is blocking and takes a few seconds. It also needlessly increases power consumption.

Sometimes, a VO loads several hwdecs (like `dmabuf_wayland`). When `cuda` is loaded, it calls
`cuInit` before running all interop inits. However, the first checks in the interops do not
require cuda initialization, so we only need to call `cuInit` after those checks.

`cuInit` is handled by the new `cuda_priv_init` function. It ensures `cuInit` is only called once.

With these changes, there's no cuda initialization if no OpenGL/Vulkan backend is available. This prevents
`dmabuf_wayland` and other VOs which automatically load cuda from waking up the nvidia dgpu unnecessarily,
making them start faster and decreasing power consumption on laptops.

Fixes: #13668

Signed-off-by: Jrelvas <[email protected]>
  • Loading branch information
jrelvas-ipc committed Apr 30, 2024
1 parent b68c742 commit 10b6961
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 4 deletions.
23 changes: 19 additions & 4 deletions video/out/hwdec/hwdec_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,25 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)

#define CHECK_CU(x) check_cu(hw, (x), #x)

// Unfortunately, calling cuInit wakes up nvidia dgpus from sleep.
// That's a blocking operation which can take a few seconds. It also increases power consumption.
// Avoid this by delaying calling it until absolutely necessary. This lets us bail out from
// using the hwdec early, without waking up the gpu.
int cuda_priv_init(const struct ra_hwdec *hw)
{
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;
int ret = 0;

if (p->initialized)
return ret;

ret = CHECK_CU(cu->cuInit(0));
p->initialized = true;

return ret;
}

static const cuda_interop_init interop_inits[] = {
#if HAVE_GL
cuda_gl_init,
Expand All @@ -83,10 +102,6 @@ static int cuda_init(struct ra_hwdec *hw)
}
cu = p->cu;

ret = CHECK_CU(cu->cuInit(0));
if (ret < 0)
return -1;

// Initialise CUDA context from backend.
for (int i = 0; interop_inits[i]; i++) {
if (interop_inits[i](hw)) {
Expand Down
3 changes: 3 additions & 0 deletions video/out/hwdec/hwdec_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct cuda_hw_priv {

// Do we need to do a full CPU sync after copying
bool do_full_sync;
bool initialized;

bool (*ext_init)(struct ra_hwdec_mapper *mapper,
const struct ra_format *format, int n);
Expand All @@ -52,6 +53,8 @@ struct cuda_mapper_priv {

typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw);

int cuda_priv_init(const struct ra_hwdec *hw);

bool cuda_gl_init(const struct ra_hwdec *hw);

bool cuda_vk_init(const struct ra_hwdec *hw);
Expand Down
4 changes: 4 additions & 0 deletions video/out/hwdec/hwdec_cuda_gl.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ bool cuda_gl_init(const struct ra_hwdec *hw) {
return false;
}

ret = cuda_priv_init(hw);
if (ret < 0)
return false;

CUdevice display_dev;
unsigned int device_count;
ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
Expand Down
4 changes: 4 additions & 0 deletions video/out/hwdec/hwdec_cuda_vk.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ bool cuda_vk_init(const struct ra_hwdec *hw) {
return false;
}

ret = cuda_priv_init(hw);
if (ret < 0)
return false;

if (!cu->cuImportExternalMemory) {
MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");
return false;
Expand Down

0 comments on commit 10b6961

Please sign in to comment.