Skip to content

Commit

Permalink
hwdec_cuda: avoid gpu wakeup by deferring cuInit
Browse files Browse the repository at this point in the history
`cuInit` wakes up the nvidia dgpu on nvidia laptops. This is bad news because the wake up process
is blocking and takes a few seconds. It also needlessly increases power consumption.

Sometimes, a VO loads several hwdecs (like `dmabuf_wayland`). When `cuda` is loaded, it calls
`cuInit` before running all interop inits. However, the first checks in the interops do not
require cuda initialization, so we only need to call `cuInit` after those checks.

This commit splits the interop `init` function into `check` and `init`. `check` can be called without
initializing the Cuda backend, so cuInit is only called *after* the first interop check.

With these changes, there's no cuda initialization if no OpenGL/Vulkan backend is available. This prevents
`dmabuf_wayland` and other VOs which automatically load cuda from waking up the nvidia dgpu unnecessarily,
making them start faster and decreasing power consumption on laptops.

Fixes: #13668
  • Loading branch information
jrelvas-ipc committed May 2, 2024
1 parent b68c742 commit e9eaac6
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 33 deletions.
47 changes: 29 additions & 18 deletions video/out/hwdec/hwdec_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)

#define CHECK_CU(x) check_cu(hw, (x), #x)

static const cuda_interop_init interop_inits[] = {
static const struct cuda_interop_fn *interop_fns[] = {
#if HAVE_GL
cuda_gl_init,
&cuda_gl_fn,
#endif
#if HAVE_VULKAN
cuda_vk_init,
&cuda_vk_fn,
#endif
NULL
};
Expand All @@ -73,25 +73,36 @@ static int cuda_init(struct ra_hwdec *hw)
CUcontext dummy;
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu;
CudaFunctions *cu = NULL;
int level = hw->probing ? MSGL_V : MSGL_ERR;

ret = cuda_load_functions(&p->cu, NULL);
if (ret != 0) {
MP_MSG(hw, level, "Failed to load CUDA symbols\n");
return -1;
}
cu = p->cu;

ret = CHECK_CU(cu->cuInit(0));
if (ret < 0)
return -1;
bool initialized = false;

// Initialise CUDA context from backend.
for (int i = 0; interop_inits[i]; i++) {
if (interop_inits[i](hw)) {
break;
// Note that the interop check doesn't require the CUDA backend to be initialized.
// This is important because cuInit wakes up the dgpu (even if the cuda hwdec won't be used!)
// Doing this allows us to check if CUDA should be used without waking up the dgpu, avoiding
// a few seconds of delay and improving battery life for laptops!
for (int i = 0; interop_fns[i]; i++) {
if (!interop_fns[i]->check(hw))
continue;

if (!initialized) {
ret = cuda_load_functions(&p->cu, NULL);
if (ret != 0) {
MP_MSG(hw, level, "Failed to load CUDA symbols\n");
return -1;
}

cu = p->cu;
ret = CHECK_CU(cu->cuInit(0));
if (ret < 0)
return -1;

initialized = true;
}

if (interop_fns[i]->init(hw))
break;
}

if (!p->ext_init || !p->ext_uninit) {
Expand Down
9 changes: 6 additions & 3 deletions video/out/hwdec/hwdec_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,13 @@ struct cuda_mapper_priv {
void *ext[4];
};

typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw);
struct cuda_interop_fn {
bool (*check)(const struct ra_hwdec *hw);
bool (*init)(const struct ra_hwdec *hw);
};

bool cuda_gl_init(const struct ra_hwdec *hw);
extern struct cuda_interop_fn cuda_gl_fn;

bool cuda_vk_init(const struct ra_hwdec *hw);
extern struct cuda_interop_fn cuda_vk_fn;

int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func);
21 changes: 15 additions & 6 deletions video/out/hwdec/hwdec_cuda_gl.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,26 @@ static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n)
#undef CHECK_CU
#define CHECK_CU(x) check_cu(hw, (x), #x)

bool cuda_gl_init(const struct ra_hwdec *hw) {
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;

static bool cuda_gl_check(const struct ra_hwdec *hw) {
if (ra_is_gl(hw->ra_ctx->ra)) {
GL *gl = ra_gl_get(hw->ra_ctx->ra);
if (gl->version < 210 && gl->es < 300) {
MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
return false;
}
} else {
// This is not an OpenGL RA.
// This is not an OpenGL RA.
return false;
}

return true;
}

static bool cuda_gl_init(const struct ra_hwdec *hw) {
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;

CUdevice display_dev;
unsigned int device_count;
ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
Expand Down Expand Up @@ -172,3 +176,8 @@ bool cuda_gl_init(const struct ra_hwdec *hw) {

return true;
}

struct cuda_interop_fn cuda_gl_fn = {
.check = cuda_gl_check,
.init = cuda_gl_init
};
21 changes: 15 additions & 6 deletions video/out/hwdec/hwdec_cuda_vk.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,9 @@ static bool cuda_ext_vk_signal(const struct ra_hwdec_mapper *mapper, int n)
#undef CHECK_CU
#define CHECK_CU(x) check_cu(hw, (x), #x)

bool cuda_vk_init(const struct ra_hwdec *hw) {
int ret = 0;
int level = hw->probing ? MSGL_V : MSGL_ERR;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;

static bool cuda_vk_check(const struct ra_hwdec *hw) {
pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);

if (gpu != NULL) {
if (!(gpu->export_caps.tex & HANDLE_TYPE)) {
MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable texture memory of type 0x%X.\n",
Expand All @@ -293,6 +289,15 @@ bool cuda_vk_init(const struct ra_hwdec *hw) {
// This is not a Vulkan RA.
return false;
}
return true;
}

static bool cuda_vk_init(const struct ra_hwdec *hw) {
int ret = 0;
int level = hw->probing ? MSGL_V : MSGL_ERR;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;
pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);

if (!cu->cuImportExternalMemory) {
MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");
Expand Down Expand Up @@ -342,3 +347,7 @@ bool cuda_vk_init(const struct ra_hwdec *hw) {
return true;
}

struct cuda_interop_fn cuda_vk_fn = {
.check = cuda_vk_check,
.init = cuda_vk_init
};

0 comments on commit e9eaac6

Please sign in to comment.