Kokkos Core Kernels Package Version of the Day
Kokkos_CudaSpace.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos v. 3.0
6// Copyright (2020) National Technology & Engineering
7// Solutions of Sandia, LLC (NTESS).
8//
9// Under the terms of Contract DE-NA0003525 with NTESS,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38//
39// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40//
41// ************************************************************************
42//@HEADER
43*/
44
45#ifndef KOKKOS_CUDASPACE_HPP
46#define KOKKOS_CUDASPACE_HPP
47
48#include <Kokkos_Macros.hpp>
49#if defined(KOKKOS_ENABLE_CUDA)
50
51#include <Kokkos_Core_fwd.hpp>
52
53#include <iosfwd>
54#include <typeinfo>
55#include <string>
56#include <memory>
57
58#include <Kokkos_HostSpace.hpp>
59#include <impl/Kokkos_SharedAlloc.hpp>
60
61#include <impl/Kokkos_Profiling_Interface.hpp>
62
63#include <Cuda/Kokkos_Cuda_abort.hpp>
64
65#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
66extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
67extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
68#endif
69
70/*--------------------------------------------------------------------------*/
71
72namespace Kokkos {
73
76class CudaSpace {
77 public:
79 using memory_space = CudaSpace;
80 using execution_space = Kokkos::Cuda;
81 using device_type = Kokkos::Device<execution_space, memory_space>;
82
83 using size_type = unsigned int;
84
85 /*--------------------------------*/
86
87 CudaSpace();
88 CudaSpace(CudaSpace&& rhs) = default;
89 CudaSpace(const CudaSpace& rhs) = default;
90 CudaSpace& operator=(CudaSpace&& rhs) = default;
91 CudaSpace& operator=(const CudaSpace& rhs) = default;
92 ~CudaSpace() = default;
93
95 void* allocate(const size_t arg_alloc_size) const;
96 void* allocate(const char* arg_label, const size_t arg_alloc_size,
97 const size_t arg_logical_size = 0) const;
98
100 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
101 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
102 const size_t arg_alloc_size,
103 const size_t arg_logical_size = 0) const;
104
105 private:
106 template <class, class, class, class>
108 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
109 const size_t arg_logical_size = 0,
110 const Kokkos::Tools::SpaceHandle =
111 Kokkos::Tools::make_space_handle(name())) const;
112 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
113 const size_t arg_alloc_size,
114 const size_t arg_logical_size = 0,
115 const Kokkos::Tools::SpaceHandle =
116 Kokkos::Tools::make_space_handle(name())) const;
117
118 public:
120 static constexpr const char* name() { return m_name; }
121
122 /*--------------------------------*/
124 KOKKOS_DEPRECATED static void access_error();
125 KOKKOS_DEPRECATED static void access_error(const void* const);
126
127 private:
128 int m_device;
129
130 static constexpr const char* m_name = "Cuda";
131 friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
132};
133} // namespace Kokkos
134
135/*--------------------------------------------------------------------------*/
136/*--------------------------------------------------------------------------*/
137
138namespace Kokkos {
139
143class CudaUVMSpace {
144 public:
146 using memory_space = CudaUVMSpace;
147 using execution_space = Cuda;
148 using device_type = Kokkos::Device<execution_space, memory_space>;
149 using size_type = unsigned int;
150
152 static bool available();
153
154 /*--------------------------------*/
156 KOKKOS_DEPRECATED static int number_of_allocations();
157
158 /*--------------------------------*/
159
160 /*--------------------------------*/
161
162 CudaUVMSpace();
163 CudaUVMSpace(CudaUVMSpace&& rhs) = default;
164 CudaUVMSpace(const CudaUVMSpace& rhs) = default;
165 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
166 CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
167 ~CudaUVMSpace() = default;
168
170 void* allocate(const size_t arg_alloc_size) const;
171 void* allocate(const char* arg_label, const size_t arg_alloc_size,
172 const size_t arg_logical_size = 0) const;
173
175 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
176 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
177 const size_t arg_alloc_size,
178 const size_t arg_logical_size = 0) const;
179
180 private:
181 template <class, class, class, class>
183 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
184 const size_t arg_logical_size = 0,
185 const Kokkos::Tools::SpaceHandle =
186 Kokkos::Tools::make_space_handle(name())) const;
187 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
188 const size_t arg_alloc_size,
189 const size_t arg_logical_size = 0,
190 const Kokkos::Tools::SpaceHandle =
191 Kokkos::Tools::make_space_handle(name())) const;
192
193 public:
195 static constexpr const char* name() { return m_name; }
196
197#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
198 static bool cuda_pin_uvm_to_host();
199 static void cuda_set_pin_uvm_to_host(bool val);
200#endif
201 /*--------------------------------*/
202
203 private:
204 int m_device;
205
206#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
207 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
208#endif
209 static constexpr const char* m_name = "CudaUVM";
210};
211
212} // namespace Kokkos
213
214/*--------------------------------------------------------------------------*/
215/*--------------------------------------------------------------------------*/
216
217namespace Kokkos {
218
222class CudaHostPinnedSpace {
223 public:
225
226 using execution_space = HostSpace::execution_space;
227 using memory_space = CudaHostPinnedSpace;
228 using device_type = Kokkos::Device<execution_space, memory_space>;
229 using size_type = unsigned int;
230
231 /*--------------------------------*/
232
233 CudaHostPinnedSpace();
234 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
235 CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
236 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
237 CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
238 ~CudaHostPinnedSpace() = default;
239
241 void* allocate(const size_t arg_alloc_size) const;
242 void* allocate(const char* arg_label, const size_t arg_alloc_size,
243 const size_t arg_logical_size = 0) const;
244
246 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
247 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
248 const size_t arg_alloc_size,
249 const size_t arg_logical_size = 0) const;
250
251 private:
252 template <class, class, class, class>
254 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
255 const size_t arg_logical_size = 0,
256 const Kokkos::Tools::SpaceHandle =
257 Kokkos::Tools::make_space_handle(name())) const;
258 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
259 const size_t arg_alloc_size,
260 const size_t arg_logical_size = 0,
261 const Kokkos::Tools::SpaceHandle =
262 Kokkos::Tools::make_space_handle(name())) const;
263
264 public:
266 static constexpr const char* name() { return m_name; }
267
268 private:
269 static constexpr const char* m_name = "CudaHostPinned";
270
271 /*--------------------------------*/
272};
273
274} // namespace Kokkos
275
276/*--------------------------------------------------------------------------*/
277/*--------------------------------------------------------------------------*/
278
279namespace Kokkos {
280namespace Impl {
281
282cudaStream_t cuda_get_deep_copy_stream();
283
284const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
285 bool initialize = true);
286
287static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,
288 Kokkos::CudaSpace>::assignable,
289 "");
290static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaUVMSpace,
291 Kokkos::CudaUVMSpace>::assignable,
292 "");
293static_assert(
294 Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaHostPinnedSpace,
295 Kokkos::CudaHostPinnedSpace>::assignable,
296 "");
297
298//----------------------------------------
299
300template <>
301struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
302 enum : bool { assignable = false };
303 enum : bool { accessible = false };
304 enum : bool { deepcopy = true };
305};
306
307template <>
308struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
309 // HostSpace::execution_space != CudaUVMSpace::execution_space
310 enum : bool { assignable = false };
311 enum : bool { accessible = true };
312 enum : bool { deepcopy = true };
313};
314
315template <>
316struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
317 // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
318 enum : bool { assignable = true };
319 enum : bool { accessible = true };
320 enum : bool { deepcopy = true };
321};
322
323//----------------------------------------
324
325template <>
326struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
327 enum : bool { assignable = false };
328 enum : bool { accessible = false };
329 enum : bool { deepcopy = true };
330};
331
332template <>
333struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
334 // CudaSpace::execution_space == CudaUVMSpace::execution_space
335 enum : bool { assignable = true };
336 enum : bool { accessible = true };
337 enum : bool { deepcopy = true };
338};
339
340template <>
341struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
342 // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
343 enum : bool { assignable = false };
344 enum : bool { accessible = true }; // CudaSpace::execution_space
345 enum : bool { deepcopy = true };
346};
347
348//----------------------------------------
349// CudaUVMSpace::execution_space == Cuda
350// CudaUVMSpace accessible to both Cuda and Host
351
352template <>
353struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
354 enum : bool { assignable = false };
355 enum : bool { accessible = false }; // Cuda cannot access HostSpace
356 enum : bool { deepcopy = true };
357};
358
359template <>
360struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
361 // CudaUVMSpace::execution_space == CudaSpace::execution_space
362 // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
363 enum : bool { assignable = false };
364
365 // CudaUVMSpace::execution_space can access CudaSpace
366 enum : bool { accessible = true };
367 enum : bool { deepcopy = true };
368};
369
370template <>
371struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
372 // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
373 enum : bool { assignable = false };
374 enum : bool { accessible = true }; // CudaUVMSpace::execution_space
375 enum : bool { deepcopy = true };
376};
377
378//----------------------------------------
379// CudaHostPinnedSpace::execution_space == HostSpace::execution_space
380// CudaHostPinnedSpace accessible to both Cuda and Host
381
382template <>
383struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
384 enum : bool { assignable = false }; // Cannot access from Cuda
385 enum : bool { accessible = true }; // CudaHostPinnedSpace::execution_space
386 enum : bool { deepcopy = true };
387};
388
389template <>
390struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
391 enum : bool { assignable = false }; // Cannot access from Host
392 enum : bool { accessible = false };
393 enum : bool { deepcopy = true };
394};
395
396template <>
397struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
398 enum : bool { assignable = false }; // different execution_space
399 enum : bool { accessible = true }; // same accessibility
400 enum : bool { deepcopy = true };
401};
402
403//----------------------------------------
404
405} // namespace Impl
406} // namespace Kokkos
407
408/*--------------------------------------------------------------------------*/
409/*--------------------------------------------------------------------------*/
410
411namespace Kokkos {
412namespace Impl {
413
414void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
415
416template <>
417struct DeepCopy<CudaSpace, CudaSpace, Cuda> {
418 DeepCopy(void* dst, const void* src, size_t);
419 DeepCopy(const Cuda&, void* dst, const void* src, size_t);
420};
421
422template <>
423struct DeepCopy<CudaSpace, HostSpace, Cuda> {
424 DeepCopy(void* dst, const void* src, size_t);
425 DeepCopy(const Cuda&, void* dst, const void* src, size_t);
426};
427
428template <>
429struct DeepCopy<HostSpace, CudaSpace, Cuda> {
430 DeepCopy(void* dst, const void* src, size_t);
431 DeepCopy(const Cuda&, void* dst, const void* src, size_t);
432};
433
434template <>
435struct DeepCopy<CudaUVMSpace, CudaUVMSpace, Cuda> {
436 DeepCopy(void* dst, const void* src, size_t n) {
437 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
438 }
439 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
440 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
441 }
442};
443
444template <>
445struct DeepCopy<CudaUVMSpace, HostSpace, Cuda> {
446 DeepCopy(void* dst, const void* src, size_t n) {
447 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
448 }
449 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
450 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
451 }
452};
453
454template <>
455struct DeepCopy<HostSpace, CudaUVMSpace, Cuda> {
456 DeepCopy(void* dst, const void* src, size_t n) {
457 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
458 }
459 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
460 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
461 }
462};
463
464template <>
465struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, Cuda> {
466 DeepCopy(void* dst, const void* src, size_t n) {
467 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
468 }
469 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
470 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
471 }
472};
473
474template <>
475struct DeepCopy<CudaHostPinnedSpace, HostSpace, Cuda> {
476 DeepCopy(void* dst, const void* src, size_t n) {
477 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
478 }
479 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
480 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
481 }
482};
483
484template <>
485struct DeepCopy<HostSpace, CudaHostPinnedSpace, Cuda> {
486 DeepCopy(void* dst, const void* src, size_t n) {
487 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
488 }
489 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
490 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
491 }
492};
493
494template <>
495struct DeepCopy<CudaUVMSpace, CudaSpace, Cuda> {
496 DeepCopy(void* dst, const void* src, size_t n) {
497 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
498 }
499 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
500 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
501 }
502};
503
504template <>
505struct DeepCopy<CudaSpace, CudaUVMSpace, Cuda> {
506 DeepCopy(void* dst, const void* src, size_t n) {
507 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
508 }
509 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
510 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
511 }
512};
513
514template <>
515struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, Cuda> {
516 DeepCopy(void* dst, const void* src, size_t n) {
517 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
518 }
519 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
520 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
521 }
522};
523
524template <>
525struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, Cuda> {
526 DeepCopy(void* dst, const void* src, size_t n) {
527 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
528 }
529 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
530 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
531 }
532};
533
534template <>
535struct DeepCopy<CudaSpace, CudaHostPinnedSpace, Cuda> {
536 DeepCopy(void* dst, const void* src, size_t n) {
537 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
538 }
539 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
540 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
541 }
542};
543
544template <>
545struct DeepCopy<CudaHostPinnedSpace, CudaSpace, Cuda> {
546 DeepCopy(void* dst, const void* src, size_t n) {
547 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
548 }
549 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
550 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
551 }
552};
553
554template <class ExecutionSpace>
555struct DeepCopy<CudaSpace, CudaSpace, ExecutionSpace> {
556 inline DeepCopy(void* dst, const void* src, size_t n) {
557 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
558 }
559
560 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
561 size_t n) {
562 exec.fence();
563 DeepCopyAsyncCuda(dst, src, n);
564 }
565};
566
567template <class ExecutionSpace>
568struct DeepCopy<CudaSpace, HostSpace, ExecutionSpace> {
569 inline DeepCopy(void* dst, const void* src, size_t n) {
570 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
571 }
572
573 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
574 size_t n) {
575 exec.fence();
576 DeepCopyAsyncCuda(dst, src, n);
577 }
578};
579
580template <class ExecutionSpace>
581struct DeepCopy<HostSpace, CudaSpace, ExecutionSpace> {
582 inline DeepCopy(void* dst, const void* src, size_t n) {
583 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
584 }
585
586 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
587 size_t n) {
588 exec.fence();
589 DeepCopyAsyncCuda(dst, src, n);
590 }
591};
592
593template <class ExecutionSpace>
594struct DeepCopy<CudaSpace, CudaUVMSpace, ExecutionSpace> {
595 inline DeepCopy(void* dst, const void* src, size_t n) {
596 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
597 }
598
599 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
600 size_t n) {
601 exec.fence();
602 DeepCopyAsyncCuda(dst, src, n);
603 }
604};
605
606template <class ExecutionSpace>
607struct DeepCopy<CudaSpace, CudaHostPinnedSpace, ExecutionSpace> {
608 inline DeepCopy(void* dst, const void* src, size_t n) {
609 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
610 }
611
612 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
613 size_t n) {
614 exec.fence();
615 DeepCopyAsyncCuda(dst, src, n);
616 }
617};
618
619template <class ExecutionSpace>
620struct DeepCopy<CudaUVMSpace, CudaSpace, ExecutionSpace> {
621 inline DeepCopy(void* dst, const void* src, size_t n) {
622 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
623 }
624
625 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
626 size_t n) {
627 exec.fence();
628 DeepCopyAsyncCuda(dst, src, n);
629 }
630};
631
632template <class ExecutionSpace>
633struct DeepCopy<CudaUVMSpace, CudaUVMSpace, ExecutionSpace> {
634 inline DeepCopy(void* dst, const void* src, size_t n) {
635 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
636 }
637
638 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
639 size_t n) {
640 exec.fence();
641 DeepCopyAsyncCuda(dst, src, n);
642 }
643};
644
645template <class ExecutionSpace>
646struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, ExecutionSpace> {
647 inline DeepCopy(void* dst, const void* src, size_t n) {
648 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
649 }
650
651 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
652 size_t n) {
653 exec.fence();
654 DeepCopyAsyncCuda(dst, src, n);
655 }
656};
657
658template <class ExecutionSpace>
659struct DeepCopy<CudaUVMSpace, HostSpace, ExecutionSpace> {
660 inline DeepCopy(void* dst, const void* src, size_t n) {
661 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
662 }
663
664 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
665 size_t n) {
666 exec.fence();
667 DeepCopyAsyncCuda(dst, src, n);
668 }
669};
670
671template <class ExecutionSpace>
672struct DeepCopy<CudaHostPinnedSpace, CudaSpace, ExecutionSpace> {
673 inline DeepCopy(void* dst, const void* src, size_t n) {
674 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
675 }
676
677 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
678 size_t n) {
679 exec.fence();
680 DeepCopyAsyncCuda(dst, src, n);
681 }
682};
683
684template <class ExecutionSpace>
685struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, ExecutionSpace> {
686 inline DeepCopy(void* dst, const void* src, size_t n) {
687 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
688 }
689
690 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
691 size_t n) {
692 exec.fence();
693 DeepCopyAsyncCuda(dst, src, n);
694 }
695};
696
697template <class ExecutionSpace>
698struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, ExecutionSpace> {
699 inline DeepCopy(void* dst, const void* src, size_t n) {
700 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
701 }
702
703 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
704 size_t n) {
705 exec.fence();
706 DeepCopyAsyncCuda(dst, src, n);
707 }
708};
709
710template <class ExecutionSpace>
711struct DeepCopy<CudaHostPinnedSpace, HostSpace, ExecutionSpace> {
712 inline DeepCopy(void* dst, const void* src, size_t n) {
713 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
714 }
715
716 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
717 size_t n) {
718 exec.fence();
719 DeepCopyAsyncCuda(dst, src, n);
720 }
721};
722
723template <class ExecutionSpace>
724struct DeepCopy<HostSpace, CudaUVMSpace, ExecutionSpace> {
725 inline DeepCopy(void* dst, const void* src, size_t n) {
726 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
727 }
728
729 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
730 size_t n) {
731 exec.fence();
732 DeepCopyAsyncCuda(dst, src, n);
733 }
734};
735
736template <class ExecutionSpace>
737struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> {
738 inline DeepCopy(void* dst, const void* src, size_t n) {
739 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
740 }
741
742 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
743 size_t n) {
744 exec.fence();
745 DeepCopyAsyncCuda(dst, src, n);
746 }
747};
748
749} // namespace Impl
750} // namespace Kokkos
751
752//----------------------------------------------------------------------------
753//----------------------------------------------------------------------------
754
755namespace Kokkos {
756namespace Impl {
757
758template <>
759class SharedAllocationRecord<Kokkos::CudaSpace, void>
760 : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
761 private:
762 friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
763 friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
764 friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
765
766 using RecordBase = SharedAllocationRecord<void, void>;
767 using base_t =
768 HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
769
770 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
771 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
772
773 static ::cudaTextureObject_t attach_texture_object(
774 const unsigned sizeof_alias, void* const alloc_ptr,
775 const size_t alloc_size);
776
777#ifdef KOKKOS_ENABLE_DEBUG
778 static RecordBase s_root_record;
779#endif
780
781 ::cudaTextureObject_t m_tex_obj = 0;
782 const Kokkos::CudaSpace m_space;
783
784 protected:
785 ~SharedAllocationRecord();
786 SharedAllocationRecord() = default;
787
788 SharedAllocationRecord(
789 const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
790 const size_t arg_alloc_size,
791 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
792
793 public:
794 template <typename AliasType>
795 inline ::cudaTextureObject_t attach_texture_object() {
796 static_assert((std::is_same<AliasType, int>::value ||
797 std::is_same<AliasType, ::int2>::value ||
798 std::is_same<AliasType, ::int4>::value),
799 "Cuda texture fetch only supported for alias types of int, "
800 "::int2, or ::int4");
801
802 if (m_tex_obj == 0) {
803 m_tex_obj = attach_texture_object(sizeof(AliasType),
804 (void*)RecordBase::m_alloc_ptr,
805 RecordBase::m_alloc_size);
806 }
807
808 return m_tex_obj;
809 }
810
811 template <typename AliasType>
812 inline int attach_texture_object_offset(const AliasType* const ptr) {
813 // Texture object is attached to the entire allocation range
814 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
815 }
816};
817
818template <>
819class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
820 : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
821 private:
822 friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
823
824 using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
825 using RecordBase = SharedAllocationRecord<void, void>;
826
827 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
828 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
829
830 static RecordBase s_root_record;
831
832 ::cudaTextureObject_t m_tex_obj = 0;
833 const Kokkos::CudaUVMSpace m_space;
834
835 protected:
836 ~SharedAllocationRecord();
837 SharedAllocationRecord() = default;
838
839 SharedAllocationRecord(
840 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
841 const size_t arg_alloc_size,
842 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
843
844 public:
845 template <typename AliasType>
846 inline ::cudaTextureObject_t attach_texture_object() {
847 static_assert((std::is_same<AliasType, int>::value ||
848 std::is_same<AliasType, ::int2>::value ||
849 std::is_same<AliasType, ::int4>::value),
850 "Cuda texture fetch only supported for alias types of int, "
851 "::int2, or ::int4");
852
853 if (m_tex_obj == 0) {
854 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
855 attach_texture_object(sizeof(AliasType),
856 (void*)RecordBase::m_alloc_ptr,
857 RecordBase::m_alloc_size);
858 }
859
860 return m_tex_obj;
861 }
862
863 template <typename AliasType>
864 inline int attach_texture_object_offset(const AliasType* const ptr) {
865 // Texture object is attached to the entire allocation range
866 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
867 }
868};
869
870template <>
871class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
872 : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
873 private:
874 friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
875
876 using RecordBase = SharedAllocationRecord<void, void>;
877 using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
878
879 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
880 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
881
882 static RecordBase s_root_record;
883
884 const Kokkos::CudaHostPinnedSpace m_space;
885
886 protected:
887 ~SharedAllocationRecord();
888 SharedAllocationRecord() = default;
889
890 SharedAllocationRecord(
891 const Kokkos::CudaHostPinnedSpace& arg_space,
892 const std::string& arg_label, const size_t arg_alloc_size,
893 const RecordBase::function_type arg_dealloc = &deallocate);
894};
895
896} // namespace Impl
897} // namespace Kokkos
898
899//----------------------------------------------------------------------------
900//----------------------------------------------------------------------------
901
902#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
903#endif /* #define KOKKOS_CUDASPACE_HPP */
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
static constexpr const char * name()
Return Name of the MemorySpace.
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.