Kokkos Core Kernels Package Version of the Day
Kokkos_Cuda.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos v. 3.0
6// Copyright (2020) National Technology & Engineering
7// Solutions of Sandia, LLC (NTESS).
8//
9// Under the terms of Contract DE-NA0003525 with NTESS,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38//
39// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40//
41// ************************************************************************
42//@HEADER
43*/
44
45#ifndef KOKKOS_CUDA_HPP
46#define KOKKOS_CUDA_HPP
47
48#include <Kokkos_Macros.hpp>
49#if defined(KOKKOS_ENABLE_CUDA)
50
51#include <Kokkos_Core_fwd.hpp>
52
53#include <iosfwd>
54#include <vector>
55
56#include <impl/Kokkos_AnalyzePolicy.hpp>
57#include <Kokkos_CudaSpace.hpp>
58
59#include <Kokkos_Parallel.hpp>
60#include <Kokkos_TaskScheduler.hpp>
61#include <Kokkos_Layout.hpp>
62#include <Kokkos_ScratchSpace.hpp>
63#include <Kokkos_MemoryTraits.hpp>
64#include <impl/Kokkos_Tags.hpp>
65#include <impl/Kokkos_ExecSpaceInitializer.hpp>
66#include <impl/Kokkos_HostSharedPtr.hpp>
67
68/*--------------------------------------------------------------------------*/
69
70namespace Kokkos {
71namespace Impl {
72class CudaExec;
73class CudaInternal;
74} // namespace Impl
75} // namespace Kokkos
76
77/*--------------------------------------------------------------------------*/
78
79namespace Kokkos {
80
81namespace Impl {
82namespace Experimental {
83enum class CudaLaunchMechanism : unsigned {
84 Default = 0,
85 ConstantMemory = 1,
86 GlobalMemory = 2,
87 LocalMemory = 4
88};
89
90constexpr inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
91 CudaLaunchMechanism p2) {
92 return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) |
93 static_cast<unsigned>(p2));
94}
95constexpr inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
96 CudaLaunchMechanism p2) {
97 return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) &
98 static_cast<unsigned>(p2));
99}
100
101template <CudaLaunchMechanism l>
102struct CudaDispatchProperties {
103 CudaLaunchMechanism launch_mechanism = l;
104};
105} // namespace Experimental
106} // namespace Impl
117class Cuda {
118 public:
120
121
123 using execution_space = Cuda;
124
125#if defined(KOKKOS_ENABLE_CUDA_UVM)
127 using memory_space = CudaUVMSpace;
128#else
130 using memory_space = CudaSpace;
131#endif
132
134 using device_type = Kokkos::Device<execution_space, memory_space>;
135
137 using size_type = memory_space::size_type;
138
140 using array_layout = LayoutLeft;
141
143 using scratch_memory_space = ScratchMemorySpace<Cuda>;
144
146 //--------------------------------------------------
148
149
152 KOKKOS_INLINE_FUNCTION static int in_parallel() {
153#if defined(__CUDA_ARCH__)
154 return true;
155#else
156 return false;
157#endif
158 }
159
171 static bool sleep();
172
178 static bool wake();
179
186 static void impl_static_fence();
187
188 void fence() const;
189
191 static int concurrency();
192
194 static void print_configuration(std::ostream&, const bool detail = false);
195
197 //--------------------------------------------------
199
200 Cuda();
201
202 Cuda(cudaStream_t stream);
203
204 //--------------------------------------------------------------------------
206
207
208 struct SelectDevice {
209 int cuda_device_id;
210 SelectDevice() : cuda_device_id(0) {}
211 explicit SelectDevice(int id) : cuda_device_id(id) {}
212 };
213
215 static void impl_finalize();
216
218 static int impl_is_initialized();
219
221 static void impl_initialize(const SelectDevice = SelectDevice(),
222 const size_t num_instances = 1);
223
227 static size_type device_arch();
228
230 static size_type detect_device_count();
231
235 static std::vector<unsigned> detect_device_arch();
236
237 cudaStream_t cuda_stream() const;
238 int cuda_device() const;
239 const cudaDeviceProp& cuda_device_prop() const;
240
242 //--------------------------------------------------------------------------
243
244 static const char* name();
245
246 inline Impl::CudaInternal* impl_internal_space_instance() const {
247 return m_space_instance.get();
248 }
249 uint32_t impl_instance_id() const noexcept { return 0; }
250
251 private:
252 Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
253};
254
255namespace Tools {
256namespace Experimental {
257template <>
258struct DeviceTypeTraits<Cuda> {
260 static constexpr DeviceType id = DeviceType::Cuda;
261};
262} // namespace Experimental
263} // namespace Tools
264
265namespace Impl {
266
267class CudaSpaceInitializer : public ExecSpaceInitializerBase {
268 public:
269 CudaSpaceInitializer() = default;
270 ~CudaSpaceInitializer() = default;
271 void initialize(const InitArguments& args) final;
272 void finalize(const bool all_spaces) final;
273 void fence() final;
274 void print_configuration(std::ostream& msg, const bool detail) final;
275};
276
277} // namespace Impl
278} // namespace Kokkos
279
280/*--------------------------------------------------------------------------*/
281/*--------------------------------------------------------------------------*/
282
283namespace Kokkos {
284namespace Impl {
285
286template <>
287struct MemorySpaceAccess<Kokkos::CudaSpace,
288 Kokkos::Cuda::scratch_memory_space> {
289 enum : bool { assignable = false };
290 enum : bool { accessible = true };
291 enum : bool { deepcopy = false };
292};
293
294#if defined(KOKKOS_ENABLE_CUDA_UVM)
295
296// If forcing use of UVM everywhere
297// then must assume that CudaUVMSpace
298// can be a stand-in for CudaSpace.
299// This will fail when a strange host-side execution space
300// that defines CudaUVMSpace as its preferredmemory space.
301
302template <>
303struct MemorySpaceAccess<Kokkos::CudaUVMSpace,
304 Kokkos::Cuda::scratch_memory_space> {
305 enum : bool { assignable = false };
306 enum : bool { accessible = true };
307 enum : bool { deepcopy = false };
308};
309
310#endif
311
312} // namespace Impl
313} // namespace Kokkos
314
315#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
316#endif /* #ifndef KOKKOS_CUDA_HPP */
Declaration of various MemoryLayout options.
Declaration of parallel operators.