Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid gpu wakeup in hwdec_cuda by deferring cuInit #14028

Merged
merged 2 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
47 changes: 29 additions & 18 deletions video/out/hwdec/hwdec_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)

#define CHECK_CU(x) check_cu(hw, (x), #x)

static const cuda_interop_init interop_inits[] = {
static const struct cuda_interop_fn *interop_fns[] = {
sfan5 marked this conversation as resolved.
Show resolved Hide resolved
#if HAVE_GL
cuda_gl_init,
&cuda_gl_fn,
#endif
#if HAVE_VULKAN
cuda_vk_init,
&cuda_vk_fn,
#endif
NULL
};
Expand All @@ -73,25 +73,36 @@ static int cuda_init(struct ra_hwdec *hw)
CUcontext dummy;
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu;
CudaFunctions *cu = NULL;
int level = hw->probing ? MSGL_V : MSGL_ERR;

ret = cuda_load_functions(&p->cu, NULL);
if (ret != 0) {
MP_MSG(hw, level, "Failed to load CUDA symbols\n");
return -1;
}
cu = p->cu;

ret = CHECK_CU(cu->cuInit(0));
if (ret < 0)
return -1;
bool initialized = false;

// Initialise CUDA context from backend.
for (int i = 0; interop_inits[i]; i++) {
if (interop_inits[i](hw)) {
break;
// Note that the interop check doesn't require the CUDA backend to be initialized.
// This is important because cuInit wakes up the dgpu (even if the cuda hwdec won't be used!)
// Doing this allows us to check if CUDA should be used without waking up the dgpu, avoiding
// a few seconds of delay and improving battery life for laptops!
for (int i = 0; interop_fns[i]; i++) {
if (!interop_fns[i]->check(hw))
continue;

if (!initialized) {
ret = cuda_load_functions(&p->cu, NULL);
if (ret != 0) {
MP_MSG(hw, level, "Failed to load CUDA symbols\n");
return -1;
}

cu = p->cu;
ret = CHECK_CU(cu->cuInit(0));
if (ret < 0)
return -1;

initialized = true;
}

if (interop_fns[i]->init(hw))
break;
}

if (!p->ext_init || !p->ext_uninit) {
Expand Down
9 changes: 6 additions & 3 deletions video/out/hwdec/hwdec_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,13 @@ struct cuda_mapper_priv {
void *ext[4];
};

typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw);
struct cuda_interop_fn {
bool (*check)(const struct ra_hwdec *hw);
bool (*init)(const struct ra_hwdec *hw);
};

bool cuda_gl_init(const struct ra_hwdec *hw);
extern struct cuda_interop_fn cuda_gl_fn;

bool cuda_vk_init(const struct ra_hwdec *hw);
extern struct cuda_interop_fn cuda_vk_fn;

int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func);
31 changes: 19 additions & 12 deletions video/out/hwdec/hwdec_cuda_gl.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,24 @@ static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n)
#undef CHECK_CU
#define CHECK_CU(x) check_cu(hw, (x), #x)

bool cuda_gl_init(const struct ra_hwdec *hw) {
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;
static bool cuda_gl_check(const struct ra_hwdec *hw) {
if (!ra_is_gl(hw->ra_ctx->ra))
return false; // This is not an OpenGL RA.

if (ra_is_gl(hw->ra_ctx->ra)) {
GL *gl = ra_gl_get(hw->ra_ctx->ra);
if (gl->version < 210 && gl->es < 300) {
MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
return false;
}
} else {
// This is not an OpenGL RA.
GL *gl = ra_gl_get(hw->ra_ctx->ra);
if (gl->version < 210 && gl->es < 300) {
MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
return false;
}

return true;
}

static bool cuda_gl_init(const struct ra_hwdec *hw) {
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;

CUdevice display_dev;
unsigned int device_count;
ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
Expand Down Expand Up @@ -172,3 +174,8 @@ bool cuda_gl_init(const struct ra_hwdec *hw) {

return true;
}

struct cuda_interop_fn cuda_gl_fn = {
.check = cuda_gl_check,
.init = cuda_gl_init
};
39 changes: 23 additions & 16 deletions video/out/hwdec/hwdec_cuda_vk.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,27 +272,30 @@ static bool cuda_ext_vk_signal(const struct ra_hwdec_mapper *mapper, int n)
#undef CHECK_CU
#define CHECK_CU(x) check_cu(hw, (x), #x)

bool cuda_vk_init(const struct ra_hwdec *hw) {
static bool cuda_vk_check(const struct ra_hwdec *hw) {
pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);
if (gpu == NULL)
return false; // This is not a Vulkan RA.

if (!(gpu->export_caps.tex & HANDLE_TYPE)) {
MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable texture memory of type 0x%X.\n",
HANDLE_TYPE);
return false;
} else if (!(gpu->export_caps.sync & HANDLE_TYPE)) {
MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable semaphores of type 0x%X.\n",
HANDLE_TYPE);
return false;
}

return true;
}

static bool cuda_vk_init(const struct ra_hwdec *hw) {
int ret = 0;
int level = hw->probing ? MSGL_V : MSGL_ERR;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;

pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);
if (gpu != NULL) {
if (!(gpu->export_caps.tex & HANDLE_TYPE)) {
MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable texture memory of type 0x%X.\n",
HANDLE_TYPE);
return false;
} else if (!(gpu->export_caps.sync & HANDLE_TYPE)) {
MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable semaphores of type 0x%X.\n",
HANDLE_TYPE);
return false;
}
} else {
// This is not a Vulkan RA.
return false;
}

if (!cu->cuImportExternalMemory) {
MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");
Expand Down Expand Up @@ -342,3 +345,7 @@ bool cuda_vk_init(const struct ra_hwdec *hw) {
return true;
}

struct cuda_interop_fn cuda_vk_fn = {
.check = cuda_vk_check,
.init = cuda_vk_init
};