| int64_t get_device_attribute(int64_t attribute, int64_t device_id); | |
| int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id); | |
| namespace cuda_utils { | |
| template <typename T> | |
| HOST_DEVICE_INLINE constexpr std::enable_if_t<std::is_integral_v<T>, T> | |
| ceil_div(T a, T b) { | |
| return (a + b - 1) / b; | |
| } | |
| }; // namespace cuda_utils |