17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22#ifndef KOKKOS_CUDA_HPP
23#define KOKKOS_CUDA_HPP
25#include <Kokkos_Macros.hpp>
26#if defined(KOKKOS_ENABLE_CUDA)
28#include <Kokkos_Core_fwd.hpp>
33#include <impl/Kokkos_AnalyzePolicy.hpp>
34#include <Kokkos_CudaSpace.hpp>
35#include <Cuda/Kokkos_Cuda_Error.hpp>
38#include <Kokkos_TaskScheduler.hpp>
40#include <Kokkos_ScratchSpace.hpp>
41#include <Kokkos_MemoryTraits.hpp>
42#include <impl/Kokkos_HostSharedPtr.hpp>
43#include <impl/Kokkos_InitializationSettings.hpp>
60enum class CudaLaunchMechanism :
unsigned {
67constexpr inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
68 CudaLaunchMechanism p2) {
69 return static_cast<CudaLaunchMechanism
>(
static_cast<unsigned>(p1) |
70 static_cast<unsigned>(p2));
72constexpr inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
73 CudaLaunchMechanism p2) {
74 return static_cast<CudaLaunchMechanism
>(
static_cast<unsigned>(p1) &
75 static_cast<unsigned>(p2));
78template <CudaLaunchMechanism l>
79struct CudaDispatchProperties {
80 CudaLaunchMechanism launch_mechanism = l;
100 using execution_space = Cuda;
102#if defined(KOKKOS_ENABLE_CUDA_UVM)
104 using memory_space = CudaUVMSpace;
107 using memory_space = CudaSpace;
114 using size_type = memory_space::size_type;
117 using array_layout = LayoutLeft;
120 using scratch_memory_space = ScratchMemorySpace<Cuda>;
129 KOKKOS_INLINE_FUNCTION
static int in_parallel() {
130#if defined(__CUDA_ARCH__)
163 static void impl_static_fence(
const std::string& name);
165 void fence(
const std::string& name =
166 "Kokkos::Cuda::fence(): Unnamed Instance Fence")
const;
169#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
170 static int concurrency();
172 int concurrency()
const;
176 void print_configuration(std::ostream& os,
bool verbose =
false)
const;
184 Cuda(cudaStream_t stream,
bool manage_stream =
false);
188 static void impl_finalize();
191 static int impl_is_initialized();
194 static void impl_initialize(InitializationSettings
const&);
199 static size_type device_arch();
202 static size_type detect_device_count();
207 static std::vector<unsigned> detect_device_arch();
209 cudaStream_t cuda_stream()
const;
210 int cuda_device()
const;
211 const cudaDeviceProp& cuda_device_prop()
const;
216 static const char* name();
218 inline Impl::CudaInternal* impl_internal_space_instance()
const {
219 return m_space_instance.get();
221 uint32_t impl_instance_id() const noexcept;
224 friend
bool operator==(Cuda const& lhs, Cuda const& rhs) {
225 return lhs.impl_internal_space_instance() ==
226 rhs.impl_internal_space_instance();
228 friend bool operator!=(Cuda
const& lhs, Cuda
const& rhs) {
229 return !(lhs == rhs);
231 Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
237struct DeviceTypeTraits<Cuda> {
239 static constexpr DeviceType
id = DeviceType::Cuda;
240 static int device_id(
const Cuda& exec) {
return exec.cuda_device(); }
247template <
class DT,
class... DP>
248struct ZeroMemset<Kokkos::Cuda, DT, DP...> {
250 const View<DT, DP...>& dst,
251 typename View<DT, DP...>::const_value_type&) {
252 KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemsetAsync(
254 dst.size() *
sizeof(
typename View<DT, DP...>::value_type),
255 exec_space_instance.cuda_stream()));
258 ZeroMemset(
const View<DT, DP...>& dst,
259 typename View<DT, DP...>::const_value_type&) {
260 KOKKOS_IMPL_CUDA_SAFE_CALL(
261 cudaMemset(dst.data(), 0,
262 dst.size() *
sizeof(
typename View<DT, DP...>::value_type)));
275struct MemorySpaceAccess<Kokkos::CudaSpace,
276 Kokkos::Cuda::scratch_memory_space> {
277 enum :
bool { assignable =
false };
278 enum :
bool { accessible =
true };
279 enum :
bool { deepcopy =
false };
282#if defined(KOKKOS_ENABLE_CUDA_UVM)
291struct MemorySpaceAccess<Kokkos::CudaUVMSpace,
292 Kokkos::Cuda::scratch_memory_space> {
293 enum :
bool { assignable =
false };
294 enum :
bool { accessible =
true };
295 enum :
bool { deepcopy =
false };
Declaration of various MemoryLayout options.
Declaration of parallel operators.
A thread safe view to a bitset.