hwdec_cuda: Avoid gpu wakeup by deferring cuInit

`cuInit` wakes up the nvidia dgpu on nvidia laptops. This is bad news because the wake up process is blocking and takes a few seconds. It also needlessly increases power consumption. Sometimes, a VO loads several hwdecs (like `dmabuf_wayland`). When `cuda` is loaded, it calls `cuInit` before running all interop inits. However, the first checks in the interops do not require cuda initialization, so we only need to call `cuInit` after those checks. `cuInit` is handled by the new `cuda_priv_init` function. It ensures `cuInit` is only called once. With these changes, there's no cuda initialization if no OpenGL/Vulkan backend is available. This prevents `dmabuf_wayland` and other VOs which automatically load cuda from waking up the nvidia dgpu unnecessarily, making them start faster and decreasing power consumption on laptops. Fixes: #13668 Signed-off-by: Jrelvas <[email protected]>
mpv-player · Apr 30, 2024 · 10b6961 · 10b6961
1 parent b68c742
commit 10b6961
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 4 deletions.
diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c
@@ -57,6 +57,25 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)
 
 #define CHECK_CU(x) check_cu(hw, (x), #x)
 
+// Unfortunately, calling cuInit wakes up nvidia dgpus from sleep.
+// That's a blocking operation which can take a few seconds. It also increases power consumption.
+// Avoid this by delaying calling it until absolutely necessary. This lets us bail out from
+// using the hwdec early, without waking up the gpu.
+int cuda_priv_init(const struct ra_hwdec *hw)
+{
+ struct cuda_hw_priv *p = hw->priv;
+ CudaFunctions *cu = p->cu;
+ int ret = 0;
+
+ if (p->initialized)
+ return ret;
+
+ ret = CHECK_CU(cu->cuInit(0));
+ p->initialized = true;
+
+ return ret;
+}
+
 static const cuda_interop_init interop_inits[] = {
 #if HAVE_GL
  cuda_gl_init,
@@ -83,10 +102,6 @@ static int cuda_init(struct ra_hwdec *hw)
  }
  cu = p->cu;
 
- ret = CHECK_CU(cu->cuInit(0));
- if (ret < 0)
- return -1;
-
  // Initialise CUDA context from backend.
  for (int i = 0; interop_inits[i]; i++) {
  if (interop_inits[i](hw)) {

diff --git a/video/out/hwdec/hwdec_cuda.h b/video/out/hwdec/hwdec_cuda.h
@@ -31,6 +31,7 @@ struct cuda_hw_priv {
 
  // Do we need to do a full CPU sync after copying
  bool do_full_sync;
+ bool initialized;
 
  bool (*ext_init)(struct ra_hwdec_mapper *mapper,
  const struct ra_format *format, int n);
@@ -52,6 +53,8 @@ struct cuda_mapper_priv {
 
 typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw);
 
+int cuda_priv_init(const struct ra_hwdec *hw);
+
 bool cuda_gl_init(const struct ra_hwdec *hw);
 
 bool cuda_vk_init(const struct ra_hwdec *hw);

diff --git a/video/out/hwdec/hwdec_cuda_gl.c b/video/out/hwdec/hwdec_cuda_gl.c
@@ -122,6 +122,10 @@ bool cuda_gl_init(const struct ra_hwdec *hw) {
  return false;
  }
 
+ ret = cuda_priv_init(hw);
+ if (ret < 0)
+ return false;
+
  CUdevice display_dev;
  unsigned int device_count;
  ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,

diff --git a/video/out/hwdec/hwdec_cuda_vk.c b/video/out/hwdec/hwdec_cuda_vk.c
@@ -294,6 +294,10 @@ bool cuda_vk_init(const struct ra_hwdec *hw) {
  return false;
  }
 
+ ret = cuda_priv_init(hw);
+ if (ret < 0)
+ return false;
+
  if (!cu->cuImportExternalMemory) {
  MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");
  return false;