IntelRealSense · Nir-Az · Jan 21, 2025 · Jan 9, 2025 · Jan 12, 2025 · Jan 12, 2025
diff --git a/src/linux/backend-hid.cpp b/src/linux/backend-hid.cpp
@@ -7,6 +7,7 @@
 #include "types.h"
 
 #include <rsutils/string/from.h>
+#include "rsutils/rsutilgpu.h"
 
 #include <thread>
 #include <chrono>
@@ -1232,21 +1233,25 @@ namespace librealsense
             for (auto& elem : common_sensors)
             {
                 hid_device_info hid_dev_info{};
-                if(!get_hid_device_info(elem.c_str(), hid_dev_info))
+                if (!get_hid_device_info(elem.c_str(), hid_dev_info))
                 {
 #ifdef RS2_USE_CUDA
-                    /* On the Jetson TX, ina3221x is the power monitor (I2C bus)
-                    This code is checking the IIA device directory, but tries to compare as USB HID device
-                    The ina3221x is not a HID device. Check here to avoid spamming the console.
-                    Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
-                    std::string device_path_str(elem.c_str());
-                    device_path_str+="/";
-                    std::string dev_name;
-                    std::ifstream(device_path_str + "name") >> dev_name;
-                    if (dev_name != std::string("ina3221x")) {
-                        LOG_WARNING("Failed to read busnum/devnum. Device Path: " << elem);
+                    if (rsutils::rs2_is_gpu_available())
+                    {
+                        /* On the Jetson TX, ina3221x is the power monitor (I2C bus)
+                        This code is checking the IIA device directory, but tries to compare as USB HID device
+                        The ina3221x is not a HID device. Check here to avoid spamming the console.
+                        Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
+                        std::string device_path_str(elem.c_str());
+                        device_path_str += "/";
+                        std::string dev_name;
+                        std::ifstream(device_path_str + "name") >> dev_name;
+                        if (dev_name != std::string("ina3221x")) {
+                            LOG_WARNING("Failed to read busnum/devnum. Device Path: " << elem);
+                        }
                     }
-#else
+#endif
+#ifndef RS2_USE_CUDA
                     LOG_INFO("Failed to read busnum/devnum. Device Path: " << elem);
 #endif
                     continue;

diff --git a/src/linux/backend-v4l2.cpp b/src/linux/backend-v4l2.cpp
@@ -58,6 +58,8 @@
 
 #include <sys/signalfd.h>
 #include <signal.h>
+#include "rsutils/rsutilgpu.h"
+
 #pragma GCC diagnostic ignored "-Woverflow"
 
 const size_t MAX_DEV_PARENT_DIR = 10;
@@ -755,9 +757,12 @@ namespace librealsense
             if (!is_usb_path_valid(video_path, dev_name, busnum, devnum, devpath))
             {
 #ifndef RS2_USE_CUDA
-               /* On the Jetson TX, the camera module is CSI & I2C and does not report as this code expects
-               Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
-               LOG_INFO("Failed to read busnum/devnum. Device Path: " << ("/sys/class/video4linux/" + name));
+                if (rsutils::rs2_is_gpu_available())
+                {
+                    /* On the Jetson TX, the camera module is CSI & I2C and does not report as this code expects
+                    Patch suggested by JetsonHacks: https://github.com/jetsonhacks/buildLibrealsense2TX */
+                    LOG_INFO("Failed to read busnum/devnum. Device Path: " << ("/sys/class/video4linux/" + name));
+                }
 #endif
                throw linux_backend_exception("Failed to read busnum/devnum of usb device");
             }

diff --git a/src/proc/align.cpp b/src/proc/align.cpp
@@ -13,7 +13,9 @@
 
 #if defined(RS2_USE_CUDA)
 #include "proc/cuda/cuda-align.h"
-#elif defined(__SSSE3__)
+#include "rsutils/rsutilgpu.h"
+#endif
+#if defined(__SSSE3__)
 #include "proc/sse/sse-align.h"
 #endif
 #include "proc/neon/neon-align.h"
@@ -24,15 +26,19 @@ namespace librealsense
 
     std::shared_ptr<align> align::create_align(rs2_stream align_to)
     {
-        #if defined(RS2_USE_CUDA)
+#if defined(RS2_USE_CUDA)
+        if (rsutils::rs2_is_gpu_available())
+        {
             return std::make_shared<librealsense::align_cuda>(align_to);
-        #elif defined(__SSSE3__)
-            return std::make_shared<librealsense::align_sse>(align_to);
-        #elif defined(__ARM_NEON) && ! defined(ANDROID)
-            return std::make_shared<librealsense::align_neon>(align_to);
-        #else
-            return std::make_shared<librealsense::align>(align_to);
-        #endif
+        }
+#endif
+#if defined(__SSSE3__)
+        return std::make_shared<librealsense::align_sse>(align_to);
+#elif defined(__ARM_NEON) && ! defined(ANDROID)
+        return std::make_shared<librealsense::align_neon>(align_to);
+#else
+        return std::make_shared<librealsense::align>(align_to);
+#endif
     }
 
     template<class GET_DEPTH, class TRANSFER_PIXEL>

diff --git a/src/proc/color-formats-converter.cpp b/src/proc/color-formats-converter.cpp
@@ -13,12 +13,14 @@
 
 #ifdef RS2_USE_CUDA
 #include "cuda/cuda-conversion.cuh"
+#include "rsutils/rsutilgpu.h"
 #endif
 #ifdef __SSSE3__
 #include <tmmintrin.h> // For SSSE3 intrinsics
 #endif
 #include "neon/image-neon.h"
 
+
 #if defined (ANDROID) || (defined (__linux__) && !defined (__x86_64__)) || (defined (__APPLE__) && !defined (__x86_64__))
 
 bool has_avx() { return false; }
@@ -45,20 +47,23 @@ bool has_avx()
 
 #endif
 
-namespace librealsense 
+namespace librealsense
 {
     /////////////////////////////
     // YUY2 unpacking routines //
     /////////////////////////////
     // This templated function unpacks YUY2 into Y8/Y16/RGB8/RGBA8/BGR8/BGRA8, depending on the compile-time parameter FORMAT.
     // It is expected that all branching outside of the loop control variable will be removed due to constant-folding.
-    template<rs2_format FORMAT> void unpack_yuy2( uint8_t * const d[], const uint8_t * s, int width, int height, int actual_size)
+    template<rs2_format FORMAT> void unpack_yuy2(uint8_t* const d[], const uint8_t* s, int width, int height, int actual_size)
     {
         auto n = width * height;
         assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
 #ifdef RS2_USE_CUDA
-        rscuda::unpack_yuy2_cuda<FORMAT>(d, s, n);
-        return;
+        if (rsutils::rs2_is_gpu_available())
+        {
+            rscuda::unpack_yuy2_cuda<FORMAT>(d, s, n);
+            return;
+        }
 #endif
 #if defined __SSSE3__ && ! defined ANDROID
         static bool do_avx = has_avx();

diff --git a/src/proc/depth-formats-converter.cpp b/src/proc/depth-formats-converter.cpp
@@ -7,36 +7,45 @@
 
 #ifdef RS2_USE_CUDA
 #include "cuda/cuda-conversion.cuh"
+#include "rsutils/rsutilgpu.h"
 #endif
 
 namespace librealsense
 {
-    void unpack_z16_y8_from_sr300_inzi( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size)
+    void unpack_z16_y8_from_sr300_inzi(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size)
     {
         auto count = width * height;
         auto in = reinterpret_cast<const uint16_t*>(source);
-        auto out_ir = reinterpret_cast<uint8_t *>(dest[1]);
+        auto out_ir = reinterpret_cast<uint8_t*>(dest[1]);
 #ifdef RS2_USE_CUDA
-        rscuda::unpack_z16_y8_from_sr300_inzi_cuda(out_ir, in, count);
-        in += count;
-#else
+        if (rsutils::rs2_is_gpu_available())
+        {
+            rscuda::unpack_z16_y8_from_sr300_inzi_cuda(out_ir, in, count);
+            in += count;
+        }
+#endif
+#ifndef RS2_USE_CUDA
         for (int i = 0; i < count; ++i) *out_ir++ = *in++ >> 2;
 #endif
-        std::memcpy( dest[0], in, count * 2 );
+        std::memcpy(dest[0], in, count * 2);
     }
 
-    void unpack_z16_y16_from_sr300_inzi( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size)
+    void unpack_z16_y16_from_sr300_inzi(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size)
     {
         auto count = width * height;
         auto in = reinterpret_cast<const uint16_t*>(source);
         auto out_ir = reinterpret_cast<uint16_t*>(dest[1]);
 #ifdef RS2_USE_CUDA
-        rscuda::unpack_z16_y16_from_sr300_inzi_cuda(out_ir, in, count);
-        in += count;
-#else
+        if (rsutils::rs2_is_gpu_available())
+        {
+            rscuda::unpack_z16_y16_from_sr300_inzi_cuda(out_ir, in, count);
+            in += count;
+        }
+#endif
+#ifndef RS2_USE_CUDA
         for (int i = 0; i < count; ++i) *out_ir++ = *in++ << 6;
 #endif
-        std::memcpy( dest[0], in, count * 2 );
+        std::memcpy(dest[0], in, count * 2);
     }
 
     void unpack_inzi(rs2_format dst_ir_format, uint8_t * const d[], const uint8_t * s, int width, int height, int actual_size)

diff --git a/src/proc/pointcloud.cpp b/src/proc/pointcloud.cpp
@@ -23,6 +23,7 @@
 #include "proc/sse/sse-pointcloud.h"
 #endif
 #include "proc/neon/neon-pointcloud.h"
+#include "rsutils/rsutilgpu.h"
 
 
 namespace librealsense
@@ -395,15 +396,19 @@ namespace librealsense
 
     std::shared_ptr<pointcloud> pointcloud::create()
     {
-        #ifdef RS2_USE_CUDA
+#ifdef RS2_USE_CUDA
+        if (rsutils::rs2_is_gpu_available())
+        {
             return std::make_shared<librealsense::pointcloud_cuda>();
-        #elif defined(__SSSE3__)
-            return std::make_shared<librealsense::pointcloud_sse>();
-        #elif defined(__ARM_NEON)  && ! defined ANDROID
-            return std::make_shared<librealsense::pointcloud_neon>();
-        #else
-            return std::make_shared<librealsense::pointcloud>();
-        #endif
+        }
+#endif
+#ifdef __SSSE3__
+        return std::make_shared<librealsense::pointcloud_sse>();
+#elif defined(__ARM_NEON)  && ! defined ANDROID
+        return std::make_shared<librealsense::pointcloud_neon>();
+#else
+        return std::make_shared<librealsense::pointcloud>();
+#endif
     }
 
     bool pointcloud::run__occlusion_filter(const rs2_extrinsics& extr)

diff --git a/src/proc/y12i-to-y16y16-mipi.cpp b/src/proc/y12i-to-y16y16-mipi.cpp
@@ -5,19 +5,24 @@
 #include "stream.h"
 #ifdef RS2_USE_CUDA
 #include "cuda/cuda-conversion.cuh"
+#include "rsutils/rsutilgpu.h"
 #endif
 
 namespace librealsense
 {
-//D457 dev - padding of 8 bits added after each bits, should be removed after it is corrected in SerDes
+    //D457 dev - padding of 8 bits added after each bits, should be removed after it is corrected in SerDes
     struct y12i_pixel_mipi { uint8_t rl : 8, rh : 4, ll : 4, lh : 8, padding : 8; int l() const { return lh << 4 | ll; } int r() const { return rh << 8 | rl; } };
 
-    void unpack_y16_y16_from_y12i_10_mipi( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size)
+    void unpack_y16_y16_from_y12i_10_mipi(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size)
     {
         auto count = width * height;
 #ifdef RS2_USE_CUDA
-        rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel_mipi *>(source));
-#else
+        if (rsutils::rs2_is_gpu_available())
+        {
+            rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel_mipi*>(source));
+        }
+#endif
+#ifndef RS2_USE_CUDA
         split_frame(dest, count, reinterpret_cast<const y12i_pixel_mipi*>(source),
             [](const y12i_pixel_mipi& p) -> uint16_t { return p.l() << 6 | p.l() >> 4; },  // We want to convert 10-bit data to 16-bit data
             [](const y12i_pixel_mipi& p) -> uint16_t { return p.r() << 6 | p.r() >> 4; }); // Multiply by 64 1/16 to efficiently approximate 65535/1023
@@ -27,12 +32,12 @@ namespace librealsense
     y12i_to_y16y16_mipi::y12i_to_y16y16_mipi(int left_idx, int right_idx)
         : y12i_to_y16y16_mipi("Y12I to Y16L Y16R Transform", left_idx, right_idx) {}
 
-    y12i_to_y16y16_mipi::y12i_to_y16y16_mipi(const char * name, int left_idx, int right_idx)
+    y12i_to_y16y16_mipi::y12i_to_y16y16_mipi(const char* name, int left_idx, int right_idx)
         : interleaved_functional_processing_block(name, RS2_FORMAT_Y12I, RS2_FORMAT_Y16, RS2_STREAM_INFRARED, RS2_EXTENSION_VIDEO_FRAME, 1,
-                                                                         RS2_FORMAT_Y16, RS2_STREAM_INFRARED, RS2_EXTENSION_VIDEO_FRAME, 2)
+            RS2_FORMAT_Y16, RS2_STREAM_INFRARED, RS2_EXTENSION_VIDEO_FRAME, 2)
     {}
 
-    void y12i_to_y16y16_mipi::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
+    void y12i_to_y16y16_mipi::process_function(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size, int input_size)
     {
         unpack_y16_y16_from_y12i_10_mipi(dest, source, width, height, actual_size);
     }

diff --git a/src/proc/y12i-to-y16y16.cpp b/src/proc/y12i-to-y16y16.cpp
@@ -5,33 +5,38 @@
 #include "stream.h"
 #ifdef RS2_USE_CUDA
 #include "cuda/cuda-conversion.cuh"
+#include "rsutils/rsutilgpu.h"
 #endif
 
 namespace librealsense
 {
     struct y12i_pixel { uint8_t rl : 8, rh : 4, ll : 4, lh : 8; int l() const { return lh << 4 | ll; } int r() const { return rh << 8 | rl; } };
 
-    void unpack_y16_y16_from_y12i_10( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size)
+    void unpack_y16_y16_from_y12i_10(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size)
     {
         auto count = width * height;
 #ifdef RS2_USE_CUDA
-        rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel *>(source));
-#else
+        if (rsutils::rs2_is_gpu_available())
+        {
+            rscuda::split_frame_y16_y16_from_y12i_cuda(dest, count, reinterpret_cast<const rscuda::y12i_pixel*>(source));
+        }
+#endif
+#ifndef RS2_USE_CUDA
         split_frame(dest, count, reinterpret_cast<const y12i_pixel*>(source),
-            [](const y12i_pixel & p) -> uint16_t { return p.l() << 6 | p.l() >> 4; },  // We want to convert 10-bit data to 16-bit data
-            [](const y12i_pixel & p) -> uint16_t { return p.r() << 6 | p.r() >> 4; }); // Multiply by 64 1/16 to efficiently approximate 65535/1023
+            [](const y12i_pixel& p) -> uint16_t { return p.l() << 6 | p.l() >> 4; },  // We want to convert 10-bit data to 16-bit data
+            [](const y12i_pixel& p) -> uint16_t { return p.r() << 6 | p.r() >> 4; }); // Multiply by 64 1/16 to efficiently approximate 65535/1023
 #endif
     }
 
     y12i_to_y16y16::y12i_to_y16y16(int left_idx, int right_idx)
         : y12i_to_y16y16("Y12I to Y16L Y16R Transform", left_idx, right_idx) {}
 
-    y12i_to_y16y16::y12i_to_y16y16(const char * name, int left_idx, int right_idx)
+    y12i_to_y16y16::y12i_to_y16y16(const char* name, int left_idx, int right_idx)
         : interleaved_functional_processing_block(name, RS2_FORMAT_Y12I, RS2_FORMAT_Y16, RS2_STREAM_INFRARED, RS2_EXTENSION_VIDEO_FRAME, 1,
-                                                                         RS2_FORMAT_Y16, RS2_STREAM_INFRARED, RS2_EXTENSION_VIDEO_FRAME, 2)
+            RS2_FORMAT_Y16, RS2_STREAM_INFRARED, RS2_EXTENSION_VIDEO_FRAME, 2)
     {}
 
-    void y12i_to_y16y16::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
+    void y12i_to_y16y16::process_function(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size, int input_size)
     {
         unpack_y16_y16_from_y12i_10(dest, source, width, height, actual_size);
     }

diff --git a/src/proc/y8i-to-y8y8.cpp b/src/proc/y8i-to-y8y8.cpp
@@ -8,17 +8,22 @@
 
 #ifdef RS2_USE_CUDA
 #include "cuda/cuda-conversion.cuh"
+#include "rsutils/rsutilgpu.h"
 #endif
 
 namespace librealsense
 {
     struct y8i_pixel { uint8_t l, r; };
-    void unpack_y8_y8_from_y8i( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size)
+    void unpack_y8_y8_from_y8i(uint8_t* const dest[], const uint8_t* source, int width, int height, int actual_size)
     {
         auto count = width * height;
 #ifdef RS2_USE_CUDA
-        rscuda::split_frame_y8_y8_from_y8i_cuda(dest, count, reinterpret_cast<const y8i_pixel *>(source));
-#else
+        if (rsutils::rs2_is_gpu_available())
+        {
+            rscuda::split_frame_y8_y8_from_y8i_cuda(dest, count, reinterpret_cast<const y8i_pixel*>(source));
+        }
+#endif
+#ifndef RS2_USE_CUDA
         split_frame(dest, count, reinterpret_cast<const y8i_pixel*>(source),
             [](const y8i_pixel & p) -> uint8_t { return p.l; },
             [](const y8i_pixel & p) -> uint8_t { return p.r; });

diff --git a/third-party/rsutils/include/rsutils/rsutilgpu.h b/third-party/rsutils/include/rsutils/rsutilgpu.h
@@ -0,0 +1,18 @@
+// License: Apache 2.0. See LICENSE file in root directory.
+// Copyright(c) 2023 Intel Corporation. All Rights Reserved.
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    namespace rsutils {
+
+        bool rs2_is_gpu_available();
+
+    }  // namespace rsutils
+
+#ifdef __cplusplus
+}
+#endif