2023-12-14 12:35:58 -05:00
|
|
|
#pragma once
|
|
|
|
|
2025-02-12 19:51:51 -08:00
|
|
|
#include <stdio.h>
|
|
|
|
|
2025-02-21 18:30:12 -05:00
|
|
|
#if defined(__HIPCC__)
|
|
|
|
#define HOST_DEVICE_INLINE __host__ __device__
|
|
|
|
#define DEVICE_INLINE __device__
|
|
|
|
#define HOST_INLINE __host__
|
|
|
|
#elif defined(__CUDACC__) || defined(_NVHPC_CUDA)
|
|
|
|
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
|
|
|
|
#define DEVICE_INLINE __device__ __forceinline__
|
|
|
|
#define HOST_INLINE __host__ __forceinline__
|
2024-08-20 09:09:33 -04:00
|
|
|
#else
|
|
|
|
#define HOST_DEVICE_INLINE inline
|
|
|
|
#define DEVICE_INLINE inline
|
|
|
|
#define HOST_INLINE inline
|
|
|
|
#endif
|
|
|
|
|
2025-02-12 19:51:51 -08:00
|
|
|
#define CUDA_CHECK(cmd) \
|
|
|
|
do { \
|
|
|
|
cudaError_t e = cmd; \
|
|
|
|
if (e != cudaSuccess) { \
|
|
|
|
printf("Failed: Cuda error %s:%d '%s'\n", __FILE__, __LINE__, \
|
|
|
|
cudaGetErrorString(e)); \
|
|
|
|
exit(EXIT_FAILURE); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2024-06-09 16:23:30 -04:00
|
|
|
int64_t get_device_attribute(int64_t attribute, int64_t device_id);
|
2023-11-23 16:31:19 -08:00
|
|
|
|
2024-06-09 16:23:30 -04:00
|
|
|
int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id);
|
2025-02-21 18:30:12 -05:00
|
|
|
|
|
|
|
namespace cuda_utils {
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
HOST_DEVICE_INLINE constexpr std::enable_if_t<std::is_integral_v<T>, T>
|
|
|
|
ceil_div(T a, T b) {
|
|
|
|
return (a + b - 1) / b;
|
|
|
|
}
|
|
|
|
|
|
|
|
}; // namespace cuda_utils
|