-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.cuh
142 lines (126 loc) · 4.32 KB
/
utils.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#pragma once
#include <stdio.h>
#ifndef __CUDACC_DEBUG__
#define cudaCheckError()
#else
#define cudaCheckError() \
{ \
auto e = cudaGetLastError(); \
if (e != cudaSuccess) \
{ \
printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e)); \
exit(0); \
} \
}
#endif
#ifndef __CUDACC_DEBUG__
#define cudaSafeCall(ans) ans
#else
#define cudaSafeCall(ans) \
{ \
gpuAssertDebug((ans), __FILE__, __LINE__); \
}
inline void
gpuAssertDebug(cudaError_t code, const char* file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr,
"GPUassert: %s %s %d\n",
cudaGetErrorString(code),
file,
line);
if (abort)
exit(code);
}
}
#endif
// atomicAdd with double is not defined if CUDA Version is not greater than or
// equal to 600 So we use this macro to keep a fully compatible program
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
#else
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do
{
assumed = old;
old = atomicCAS(
address_as_ull,
assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN
// != NaN)
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
// atomicMin is only supported for unsigned int & long long, ushort is not
// supported by atomicCAS (even though the doc says so)
#if !defined(__CUDA_ARCH__)
#else
__device__ float atomicMin(float* address, float val)
{
int* address_as_i = (int*)address;
int old = *address_as_i, assumed;
do
{
assumed = old;
old = atomicCAS(address_as_i,
assumed,
__float_as_int(fminf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
}
__device__ double atomicMin(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do
{
assumed = old;
old = atomicCAS(
address_as_ull,
assumed,
__double_as_longlong(fmin(val, __longlong_as_double(assumed))));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN
// != NaN)
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
// atomicMax is only supported for unsigned int & long long, ushort is not
// supported by atomicCAS (even though the doc says so)
#if !defined(__CUDA_ARCH__)
#else
__device__ float atomicMax(float* address, float val)
{
int* address_as_i = (int*)address;
int old = *address_as_i, assumed;
do
{
assumed = old;
old = atomicCAS(address_as_i,
assumed,
__float_as_int(fmaxf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
}
__device__ double atomicMax(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do
{
assumed = old;
old = atomicCAS(
address_as_ull,
assumed,
__double_as_longlong(fmax(val, __longlong_as_double(assumed))));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN
// != NaN)
} while (assumed != old);
return __longlong_as_double(old);
}
#endif