Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changes to support CUDA build w/o GPU in runtime #13674

30 changes: 17 additions & 13 deletions src/linux/backend-hid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "types.h"

#include <rsutils/string/from.h>
#include "rsutils/accelerators/gpu.h"

#include <thread>
#include <chrono>
Expand Down Expand Up @@ -1232,23 +1233,26 @@ namespace librealsense
for (auto& elem : common_sensors)
{
hid_device_info hid_dev_info{};
if(!get_hid_device_info(elem.c_str(), hid_dev_info))
if (!get_hid_device_info(elem.c_str(), hid_dev_info))
{
#ifdef RS2_USE_CUDA
/* On the Jetson TX, ina3221x is the power monitor (I2C bus)
This code is checking the IIA device directory, but tries to compare as USB HID device
The ina3221x is not a HID device. Check here to avoid spamming the console.
Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
std::string device_path_str(elem.c_str());
device_path_str+="/";
std::string dev_name;
std::ifstream(device_path_str + "name") >> dev_name;
if (dev_name != std::string("ina3221x")) {
LOG_WARNING("Failed to read busnum/devnum. Device Path: " << elem);
if (rsutils::rs2_is_gpu_available())
{
/* On the Jetson TX, ina3221x is the power monitor (I2C bus)
This code is checking the IIA device directory, but tries to compare as USB HID device
The ina3221x is not a HID device. Check here to avoid spamming the console.
Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
std::string device_path_str(elem.c_str());
device_path_str += "/";
std::string dev_name;
std::ifstream(device_path_str + "name") >> dev_name;
if (dev_name != std::string("ina3221x")) {
LOG_WARNING("Failed to read busnum/devnum. Device Path: " << elem);
}
continue;
}
#else
LOG_INFO("Failed to read busnum/devnum. Device Path: " << elem);
#endif
LOG_INFO("Failed to read busnum/devnum. Device Path: " << elem);
continue;
}
action(hid_dev_info);
Expand Down
11 changes: 8 additions & 3 deletions src/linux/backend-v4l2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@

#include <sys/signalfd.h>
#include <signal.h>
#include "rsutils/accelerators/gpu.h"

#pragma GCC diagnostic ignored "-Woverflow"

const size_t MAX_DEV_PARENT_DIR = 10;
Expand Down Expand Up @@ -755,9 +757,12 @@ namespace librealsense
if (!is_usb_path_valid(video_path, dev_name, busnum, devnum, devpath))
{
#ifndef RS2_USE_CUDA
/* On the Jetson TX, the camera module is CSI & I2C and does not report as this code expects
Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
LOG_INFO("Failed to read busnum/devnum. Device Path: " << ("/sys/class/video4linux/" + name));
if (rsutils::rs2_is_gpu_available())
{
/* On the Jetson TX, the camera module is CSI & I2C and does not report as this code expects
Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
LOG_INFO("Failed to read busnum/devnum. Device Path: " << ("/sys/class/video4linux/" + name));
}
#endif
throw linux_backend_exception("Failed to read busnum/devnum of usb device");
}
Expand Down
10 changes: 8 additions & 2 deletions src/proc/align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

#if defined(RS2_USE_CUDA)
#include "proc/cuda/cuda-align.h"
#elif defined(__SSSE3__)
#include "rsutils/accelerators/gpu.h"
#endif
#if defined(__SSSE3__)
#include "proc/sse/sse-align.h"
#endif
#include "proc/neon/neon-align.h"
Expand All @@ -25,8 +27,12 @@ namespace librealsense
std::shared_ptr<align> align::create_align(rs2_stream align_to)
{
#if defined(RS2_USE_CUDA)
if (rsutils::rs2_is_gpu_available())
{
return std::make_shared<librealsense::align_cuda>(align_to);
#elif defined(__SSSE3__)
}
#endif
#if defined(__SSSE3__)
return std::make_shared<librealsense::align_sse>(align_to);
#elif defined(__ARM_NEON) && ! defined(ANDROID)
return std::make_shared<librealsense::align_neon>(align_to);
Expand Down
8 changes: 6 additions & 2 deletions src/proc/color-formats-converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#include "rsutils/accelerators/gpu.h"
#endif
#ifdef __SSSE3__
#include <tmmintrin.h> // For SSSE3 intrinsics
Expand Down Expand Up @@ -57,8 +58,11 @@ namespace librealsense
auto n = width * height;
assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
#ifdef RS2_USE_CUDA
rscuda::unpack_yuy2_cuda<FORMAT>(d, s, n);
return;
if (rsutils::rs2_is_gpu_available())
{
rscuda::unpack_yuy2_cuda<FORMAT>(d, s, n);
return;
}
#endif
#if defined __SSSE3__ && ! defined ANDROID
static bool do_avx = has_avx();
Expand Down
23 changes: 15 additions & 8 deletions src/proc/depth-formats-converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#include "rsutils/accelerators/gpu.h"
#endif

namespace librealsense
Expand All @@ -17,11 +18,14 @@ namespace librealsense
auto in = reinterpret_cast<const uint16_t*>(source);
auto out_ir = reinterpret_cast<uint8_t *>(dest[1]);
#ifdef RS2_USE_CUDA
rscuda::unpack_z16_y8_from_sr300_inzi_cuda(out_ir, in, count);
in += count;
#else
for (int i = 0; i < count; ++i) *out_ir++ = *in++ >> 2;
if (rsutils::rs2_is_gpu_available())
{
rscuda::unpack_z16_y8_from_sr300_inzi_cuda(out_ir, in, count);
in += count;
}
else
#endif
for (int i = 0; i < count; ++i) *out_ir++ = *in++ >> 2;
Nir-Az marked this conversation as resolved.
Show resolved Hide resolved
std::memcpy( dest[0], in, count * 2 );
}

Expand All @@ -31,11 +35,14 @@ namespace librealsense
auto in = reinterpret_cast<const uint16_t*>(source);
auto out_ir = reinterpret_cast<uint16_t*>(dest[1]);
#ifdef RS2_USE_CUDA
rscuda::unpack_z16_y16_from_sr300_inzi_cuda(out_ir, in, count);
in += count;
#else
for (int i = 0; i < count; ++i) *out_ir++ = *in++ << 6;
if (rsutils::rs2_is_gpu_available())
{
rscuda::unpack_z16_y16_from_sr300_inzi_cuda(out_ir, in, count);
in += count;
}
else
#endif
for (int i = 0; i < count; ++i) *out_ir++ = *in++ << 6;
Nir-Az marked this conversation as resolved.
Show resolved Hide resolved
std::memcpy( dest[0], in, count * 2 );
}

Expand Down
7 changes: 6 additions & 1 deletion src/proc/pointcloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#ifdef RS2_USE_CUDA
#include "proc/cuda/cuda-pointcloud.h"
#include "rsutils/accelerators/gpu.h"
#endif
#ifdef __SSSE3__
#include "proc/sse/sse-pointcloud.h"
Expand Down Expand Up @@ -396,8 +397,12 @@ namespace librealsense
std::shared_ptr<pointcloud> pointcloud::create()
{
#ifdef RS2_USE_CUDA
if (rsutils::rs2_is_gpu_available())
{
return std::make_shared<librealsense::pointcloud_cuda>();
#elif defined(__SSSE3__)
}
#endif
#ifdef __SSSE3__
return std::make_shared<librealsense::pointcloud_sse>();
#elif defined(__ARM_NEON) && ! defined ANDROID
return std::make_shared<librealsense::pointcloud_neon>();
Expand Down
10 changes: 7 additions & 3 deletions src/proc/y12i-to-y16y16-mipi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "stream.h"
#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#include "rsutils/accelerators/gpu.h"
#endif

namespace librealsense
Expand All @@ -16,12 +17,15 @@ namespace librealsense
{
auto count = width * height;
#ifdef RS2_USE_CUDA
rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel_mipi *>(source));
#else
if (rsutils::rs2_is_gpu_available())
{
rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel_mipi*>(source));
return;
}
#endif
split_frame(dest, count, reinterpret_cast<const y12i_pixel_mipi*>(source),
Nir-Az marked this conversation as resolved.
Show resolved Hide resolved
[](const y12i_pixel_mipi& p) -> uint16_t { return p.l() << 6 | p.l() >> 4; }, // We want to convert 10-bit data to 16-bit data
[](const y12i_pixel_mipi& p) -> uint16_t { return p.r() << 6 | p.r() >> 4; }); // Multiply by 64 1/16 to efficiently approximate 65535/1023
#endif
}

y12i_to_y16y16_mipi::y12i_to_y16y16_mipi(int left_idx, int right_idx)
Expand Down
10 changes: 7 additions & 3 deletions src/proc/y12i-to-y16y16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "stream.h"
#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#include "rsutils/accelerators/gpu.h"
#endif

namespace librealsense
Expand All @@ -15,12 +16,15 @@ namespace librealsense
{
auto count = width * height;
#ifdef RS2_USE_CUDA
rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel *>(source));
#else
if (rsutils::rs2_is_gpu_available())
{
rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel*>(source));
return;
}
#endif
split_frame(dest, count, reinterpret_cast<const y12i_pixel*>(source),
Nir-Az marked this conversation as resolved.
Show resolved Hide resolved
[](const y12i_pixel & p) -> uint16_t { return p.l() << 6 | p.l() >> 4; }, // We want to convert 10-bit data to 16-bit data
[](const y12i_pixel & p) -> uint16_t { return p.r() << 6 | p.r() >> 4; }); // Multiply by 64 1/16 to efficiently approximate 65535/1023
#endif
}

y12i_to_y16y16::y12i_to_y16y16(int left_idx, int right_idx)
Expand Down
10 changes: 7 additions & 3 deletions src/proc/y8i-to-y8y8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#include "rsutils/accelerators/gpu.h"
#endif

namespace librealsense
Expand All @@ -17,12 +18,15 @@ namespace librealsense
{
auto count = width * height;
#ifdef RS2_USE_CUDA
rscuda::split_frame_y8_y8_from_y8i_cuda(dest, count, reinterpret_cast<const y8i_pixel *>(source));
#else
if (rsutils::rs2_is_gpu_available())
{
rscuda::split_frame_y8_y8_from_y8i_cuda(dest, count, reinterpret_cast<const y8i_pixel*>(source));
return;
}
#endif
split_frame(dest, count, reinterpret_cast<const y8i_pixel*>(source),
Nir-Az marked this conversation as resolved.
Show resolved Hide resolved
[](const y8i_pixel & p) -> uint8_t { return p.l; },
[](const y8i_pixel & p) -> uint8_t { return p.r; });
#endif
}

y8i_to_y8y8::y8i_to_y8y8(int left_idx, int right_idx) :
Expand Down
10 changes: 10 additions & 0 deletions third-party/rsutils/include/rsutils/accelerators/gpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2025 Intel Corporation. All Rights Reserved.

#pragma once

namespace rsutils {

bool rs2_is_gpu_available();

} // namespace rsutils
45 changes: 45 additions & 0 deletions third-party/rsutils/src/rsutilgpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2025 Intel Corporation. All Rights Reserved.

#include "rsutils/accelerators/gpu.h"
#include <rsutils/easylogging/easyloggingpp.h>

#ifdef RS2_USE_CUDA
#include <cuda_runtime.h>
#endif

namespace rsutils {

class GPUChecker {
public:
static bool is_gpu_available() {
static int gpuDeviceCount = -1;
#ifdef RS2_USE_CUDA

if (gpuDeviceCount < 0)
{
cudaError_t error = cudaGetDeviceCount(&gpuDeviceCount);
if (error != cudaSuccess) {
LOG_ERROR("cudaGetDeviceCount failed: " << cudaGetErrorString(error));
gpuDeviceCount = 0; // Set to 0 to avoid repeated error logging
}
if (gpuDeviceCount <= 0)
{
LOG_INFO("Avoid CUDA execution as no NVIDIA GPU found.");
}
else
{
LOG_INFO("Found " << gpuDeviceCount << " NVIDIA GPU.");
}
}
#endif
return gpuDeviceCount > 0;
}
};

bool rs2_is_gpu_available() {
return rsutils::GPUChecker::is_gpu_available();
}

} // namespace rsutils

Loading